You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
272 lines
8.7 KiB
272 lines
8.7 KiB
#[macro_use] |
|
extern crate clap; |
|
|
|
use reqwest; |
|
use scraper::{Html, Selector, ElementRef}; |
|
use serde_json::{Value, self}; |
|
use itertools::izip; |
|
use chrono::{NaiveDateTime, DateTime, Local, Utc, offset::TimeZone, Datelike}; |
|
use std::collections::HashMap; |
|
use std::path::Path; |
|
use std::fs::File; |
|
use std::io::{self, Write}; |
|
use clap::{Arg, App, ArgMatches}; |
|
|
|
const OUT_DIR_DEFAULT: &'static str = "out"; |
|
const FETCH_MONTHS_DEFAULT: u32 = 3; |
|
const OTAKU_URL: &'static str = "https://otakulounge.com/wp-admin/admin-ajax.php"; |
|
|
|
struct Config { |
|
out_dir: String, |
|
ym_start: (u32, u32), |
|
fetch_months: u32, |
|
} |
|
|
|
impl Config { |
|
fn from_arg_matches<'a>(matches: ArgMatches<'a>) -> Self { |
|
let out_dir = matches.value_of("outdir").unwrap_or(OUT_DIR_DEFAULT).to_string(); |
|
let fetch_months = value_t!(matches, "fetch", u32).unwrap_or(FETCH_MONTHS_DEFAULT); |
|
let date_start = matches.value_of("date") |
|
.map_or_else( |
|
|| Local::now(), |
|
|date_str| { |
|
Local |
|
.datetime_from_str(&format!("{}-01 00:00", date_str), "%Y-%m-%d %H:%M") |
|
.expect("Unable to parse date") |
|
}); |
|
|
|
let ym_start = (date_start.year() as u32, date_start.month()); |
|
|
|
Config { |
|
out_dir, |
|
ym_start, |
|
fetch_months |
|
} |
|
} |
|
|
|
} |
|
|
|
fn parse_args<'a>() -> ArgMatches<'a> { |
|
App::new("Otaku Event Scraper") |
|
.version("0.1") |
|
.about("Scrap events from otakulounge.com and create corresponding iCal files") |
|
.arg(Arg::with_name("outdir") |
|
.short("o") |
|
.long("outdir") |
|
.value_name("DIR") |
|
.help(&format!{"Set output directory for *.isc files, default to `{}`", OUT_DIR_DEFAULT}) |
|
.takes_value(true)) |
|
.arg(Arg::with_name("fetch") |
|
.short("f") |
|
.long("fetch") |
|
.value_name("MONTHS") |
|
.help("Number of months to fetch the events from, default to 3") |
|
.takes_value(true) |
|
.min_values(1) |
|
.max_values(12)) |
|
.arg(Arg::with_name("date") |
|
.short("d") |
|
.long("date") |
|
.value_name("DATE") |
|
.help("Start date to fetch the events, default for today. Format YYYY-MM") |
|
.takes_value(true)) |
|
.get_matches() |
|
} |
|
|
|
fn get_config() -> Config { |
|
Config::from_arg_matches(parse_args()) |
|
} |
|
|
|
|
|
fn get_events_html((year, month): (u32, u32)) -> Result<String, reqwest::Error> { |
|
println!("querying otakulounge.com for month {}-{}", year, month); |
|
let mut res = reqwest::Client::new() |
|
.post(OTAKU_URL) |
|
.form(&vec![ |
|
("action", "mec_monthly_view_load_month"), |
|
("mec_year", &year.to_string()), |
|
("mec_month", &month.to_string()), |
|
("atts[label]", ""), |
|
("atts[category]", ""), |
|
("atts[location]", ""), |
|
("atts[organizer]", ""), |
|
("atts[tag]", ""), |
|
("atts[author]", ""), |
|
("atts[skin]", "full_calendar"), |
|
("atts[show_past_events]", "1"), |
|
("atts[sf_status]", "0"), |
|
("atts[id]", "1019"), |
|
("atts[s]", ""), |
|
("atts[append_js_codes]", "1"), |
|
("atts[sed_method]", "0"), |
|
("atts[image_popup]", "0"), |
|
("apply_sf_date", "0") |
|
]) |
|
.send()?; |
|
|
|
println!("status: {}", res.status()); |
|
|
|
let body: Value = res.json()?; |
|
let body_text = body["events_side"].as_str().unwrap().to_string(); |
|
Ok(body_text) |
|
} |
|
|
|
fn add_to_month((year, month): (u32, u32), n: u32) -> (u32, u32) { |
|
(year + (month + n - 1) / 12, (month + n - 1) % 12 + 1) |
|
} |
|
|
|
fn get_inner_text(el: ElementRef) -> String { |
|
el.text().collect::<Vec<_>>().join("") |
|
} |
|
|
|
fn get_description_from_bad_json_ld(el: ElementRef) -> Result<String, serde_json::Error> { |
|
let inner_text = get_inner_text(el); |
|
let json_event: Value = serde_json::from_str(&inner_text.replace('\n', ""))?; |
|
let serialized_description = json_event["description"].as_str().unwrap(); |
|
let html_src = get_inner_text(Html::parse_fragment(serialized_description).root_element()); |
|
let html_description = Html::parse_fragment(&html_src); |
|
let span_selector = Selector::parse("span").unwrap(); |
|
Ok(html_description |
|
.select(&span_selector) |
|
.map(get_inner_text) |
|
.collect::<Vec<_>>() |
|
.join("\\n")) |
|
} |
|
|
|
fn parse_to_utc(date: &str, time: &str) -> DateTime<Utc> { |
|
let dt = NaiveDateTime::parse_from_str(&format!("{} {}", date, time), "%Y%m%d %l:%M %P").unwrap(); |
|
let local = Local.from_local_datetime(&dt).single().unwrap(); |
|
local.with_timezone(&Utc) |
|
} |
|
|
|
fn create_event(title: &str, description: &str, date: &str, time: String, id: u8) -> String { |
|
println!("{:#?} -> {:#?}", title, time); |
|
let (start, end) = split_time(&time); |
|
let date_start = parse_to_utc(date, start); |
|
let date_end = parse_to_utc(date, end); |
|
let timestamp = Utc::now(); |
|
let uid = format!("day{}event{}@otaku-event.bksp.space", date, id); |
|
|
|
format!(r#"BEGIN:VEVENT |
|
DTSTART:{} |
|
DTEND:{} |
|
DTSTAMP:{} |
|
UID:{} |
|
SUMMARY:{} |
|
DESCRIPTION:{} |
|
LOCATION:Otaku Manga Lounge |
|
END:VEVENT"#, |
|
date_start.format("%Y%m%dT%H%M%SZ"), |
|
date_end.format("%Y%m%dT%H%M%SZ"), |
|
timestamp.format("%Y%m%dT%H%M%SZ"), |
|
uid, |
|
title, |
|
description |
|
) |
|
} |
|
|
|
fn split_time(time: &str) -> (&str, &str) { |
|
if let Some(dash_idx) = time.find('-') { |
|
let start = &time[1..(dash_idx - 1)]; |
|
let end = &time[(dash_idx + 2)..]; |
|
(start, end) |
|
} else { |
|
(&time[1..], "10:00 pm") |
|
} |
|
} |
|
|
|
fn write_cal(folder: &str, name: &str, events: &Vec<String>) -> io::Result<String> { |
|
let filename = Path::new(folder).join(name).with_extension("isc"); |
|
println!("writing {:?}", filename); |
|
let mut file = File::create(&filename)?; |
|
file.write_all(b"BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//bksp.space - EventScraper//\n")?; |
|
for event in events { |
|
write!(file, "{}\n", event)?; |
|
} |
|
file.write_all(b"END:VCALENDAR")?; |
|
Ok(filename.file_name().unwrap().to_str().unwrap().to_string()) |
|
} |
|
|
|
fn write_index(folder: &str, events: Vec<(String, String)>) -> io::Result<()> { |
|
let mut index = File::create(Path::new(folder).join("index.html"))?; |
|
|
|
index.write_all(br#" |
|
<!doctype html> |
|
<html> |
|
<head> |
|
<meta charse="ut8"> |
|
<title>Otaku Event Calendars</title> |
|
</head> |
|
<body> |
|
<h1>Otaku Event Calendars</h1> |
|
<ul> |
|
"#)?; |
|
|
|
for (url, name) in events { |
|
write!(index, r#"<li><a href="{}">{}</a></li>"#, url, name)?; |
|
} |
|
|
|
index.write_all(br#" |
|
</ul> |
|
</body> |
|
</html> |
|
"#)?; |
|
|
|
Ok(()) |
|
} |
|
|
|
fn main() -> io::Result<()> { |
|
let config = get_config(); |
|
|
|
let frags_iter = (0..config.fetch_months).map(|n| add_to_month(config.ym_start, n)) |
|
.filter_map(|ym| get_events_html(ym).ok()) |
|
.map(|body| Html::parse_fragment(&body)); |
|
|
|
let days_selector = Selector::parse(".mec-calendar-events-sec").unwrap(); |
|
let title_selector = Selector::parse("h4.mec-event-title").unwrap(); |
|
let time_selector = Selector::parse(".mec-event-time").unwrap(); |
|
let json_ld_selector = Selector::parse(r#"script[type="application/ld+json"]"#).unwrap(); |
|
|
|
|
|
let mut activities = Vec::new(); |
|
let mut all_courses = Vec::new(); |
|
let mut courses = HashMap::new(); |
|
|
|
for fragment in frags_iter { |
|
let days = fragment.select(&days_selector); |
|
|
|
for day in days { |
|
let date = day.value().attr("data-mec-cell").unwrap_or_default(); |
|
|
|
let titles = day.select(&title_selector).map(get_inner_text); |
|
let times = day.select(&time_selector).map(get_inner_text); |
|
let descriptions = day.select(&json_ld_selector).map(get_description_from_bad_json_ld); |
|
|
|
let mut idx = 0u8; |
|
for (title, time, description) in izip!(titles, times, descriptions) { |
|
let event = create_event(&title, &description.unwrap_or_default(), &date, time, idx); |
|
|
|
if let Some(0) = title.find("Cours") { |
|
all_courses.push(event.clone()); |
|
let this_course = courses.entry(title).or_insert(Vec::<String>::new()); |
|
this_course.push(event); |
|
} else { |
|
activities.push(event); |
|
} |
|
idx += 1; |
|
} |
|
} |
|
} |
|
|
|
let mut event_names = Vec::new(); |
|
|
|
event_names.push((write_cal(&config.out_dir, "activities", &activities)?, "Activities".into())); |
|
event_names.push((write_cal(&config.out_dir, "all_courses", &activities)?, "All courses".into())); |
|
|
|
for (title, events) in courses.iter() { |
|
event_names.push((write_cal(&config.out_dir, title, &events)?, title.into())); |
|
} |
|
|
|
write_index(&config.out_dir, event_names)?; |
|
Ok(()) |
|
}
|
|
|