You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
273 lines
8.7 KiB
273 lines
8.7 KiB
#[macro_use]
|
|
extern crate clap;
|
|
|
|
use reqwest;
|
|
use scraper::{Html, Selector, ElementRef};
|
|
use serde_json::{Value, self};
|
|
use itertools::izip;
|
|
use chrono::{NaiveDateTime, DateTime, Local, Utc, offset::TimeZone, Datelike};
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
use std::fs::File;
|
|
use std::io::{self, Write};
|
|
use clap::{Arg, App, ArgMatches};
|
|
|
|
const OUT_DIR_DEFAULT: &'static str = "out";
|
|
const FETCH_MONTHS_DEFAULT: u32 = 3;
|
|
const OTAKU_URL: &'static str = "https://otakulounge.com/wp-admin/admin-ajax.php";
|
|
|
|
struct Config {
|
|
out_dir: String,
|
|
ym_start: (u32, u32),
|
|
fetch_months: u32,
|
|
}
|
|
|
|
impl Config {
|
|
fn from_arg_matches<'a>(matches: ArgMatches<'a>) -> Self {
|
|
let out_dir = matches.value_of("outdir").unwrap_or(OUT_DIR_DEFAULT).to_string();
|
|
let fetch_months = value_t!(matches, "fetch", u32).unwrap_or(FETCH_MONTHS_DEFAULT);
|
|
let date_start = matches.value_of("date")
|
|
.map_or_else(
|
|
|| Local::now(),
|
|
|date_str| {
|
|
Local
|
|
.datetime_from_str(&format!("{}-01 00:00", date_str), "%Y-%m-%d %H:%M")
|
|
.expect("Unable to parse date")
|
|
});
|
|
|
|
let ym_start = (date_start.year() as u32, date_start.month());
|
|
|
|
Config {
|
|
out_dir,
|
|
ym_start,
|
|
fetch_months
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
fn parse_args<'a>() -> ArgMatches<'a> {
|
|
App::new("Otaku Event Scraper")
|
|
.version("0.1")
|
|
.about("Scrap events from otakulounge.com and create corresponding iCal files")
|
|
.arg(Arg::with_name("outdir")
|
|
.short("o")
|
|
.long("outdir")
|
|
.value_name("DIR")
|
|
.help(&format!{"Set output directory for *.isc files, default to `{}`", OUT_DIR_DEFAULT})
|
|
.takes_value(true))
|
|
.arg(Arg::with_name("fetch")
|
|
.short("f")
|
|
.long("fetch")
|
|
.value_name("MONTHS")
|
|
.help("Number of months to fetch the events from, default to 3")
|
|
.takes_value(true)
|
|
.min_values(1)
|
|
.max_values(12))
|
|
.arg(Arg::with_name("date")
|
|
.short("d")
|
|
.long("date")
|
|
.value_name("DATE")
|
|
.help("Start date to fetch the events, default for today. Format YYYY-MM")
|
|
.takes_value(true))
|
|
.get_matches()
|
|
}
|
|
|
|
fn get_config() -> Config {
|
|
Config::from_arg_matches(parse_args())
|
|
}
|
|
|
|
|
|
fn get_events_html((year, month): (u32, u32)) -> Result<String, reqwest::Error> {
|
|
println!("querying otakulounge.com for month {}-{}", year, month);
|
|
let mut res = reqwest::Client::new()
|
|
.post(OTAKU_URL)
|
|
.form(&vec![
|
|
("action", "mec_monthly_view_load_month"),
|
|
("mec_year", &year.to_string()),
|
|
("mec_month", &month.to_string()),
|
|
("atts[label]", ""),
|
|
("atts[category]", ""),
|
|
("atts[location]", ""),
|
|
("atts[organizer]", ""),
|
|
("atts[tag]", ""),
|
|
("atts[author]", ""),
|
|
("atts[skin]", "full_calendar"),
|
|
("atts[show_past_events]", "1"),
|
|
("atts[sf_status]", "0"),
|
|
("atts[id]", "1019"),
|
|
("atts[s]", ""),
|
|
("atts[append_js_codes]", "1"),
|
|
("atts[sed_method]", "0"),
|
|
("atts[image_popup]", "0"),
|
|
("apply_sf_date", "0")
|
|
])
|
|
.send()?;
|
|
|
|
println!("status: {}", res.status());
|
|
|
|
let body: Value = res.json()?;
|
|
let body_text = body["events_side"].as_str().unwrap().to_string();
|
|
Ok(body_text)
|
|
}
|
|
|
|
fn add_to_month((year, month): (u32, u32), n: u32) -> (u32, u32) {
|
|
(year + (month + n - 1) / 12, (month + n - 1) % 12 + 1)
|
|
}
|
|
|
|
fn get_inner_text(el: ElementRef) -> String {
|
|
el.text().collect::<Vec<_>>().join("")
|
|
}
|
|
|
|
fn get_description_from_bad_json_ld(el: ElementRef) -> Result<String, serde_json::Error> {
|
|
let inner_text = get_inner_text(el);
|
|
let json_event: Value = serde_json::from_str(&inner_text.replace('\n', ""))?;
|
|
let serialized_description = json_event["description"].as_str().unwrap();
|
|
let html_src = get_inner_text(Html::parse_fragment(serialized_description).root_element());
|
|
let html_description = Html::parse_fragment(&html_src);
|
|
let span_selector = Selector::parse("span").unwrap();
|
|
Ok(html_description
|
|
.select(&span_selector)
|
|
.map(get_inner_text)
|
|
.collect::<Vec<_>>()
|
|
.join("\\n"))
|
|
}
|
|
|
|
fn parse_to_utc(date: &str, time: &str) -> DateTime<Utc> {
|
|
let dt = NaiveDateTime::parse_from_str(&format!("{} {}", date, time), "%Y%m%d %l:%M %P").unwrap();
|
|
let local = Local.from_local_datetime(&dt).single().unwrap();
|
|
local.with_timezone(&Utc)
|
|
}
|
|
|
|
fn create_event(title: &str, description: &str, date: &str, time: String, id: u8) -> String {
|
|
println!("{:#?} -> {:#?}", title, time);
|
|
let (start, end) = split_time(&time);
|
|
let date_start = parse_to_utc(date, start);
|
|
let date_end = parse_to_utc(date, end);
|
|
let timestamp = Utc::now();
|
|
let uid = format!("day{}event{}@otaku-event.bksp.space", date, id);
|
|
|
|
format!(r#"BEGIN:VEVENT
|
|
DTSTART:{}
|
|
DTEND:{}
|
|
DTSTAMP:{}
|
|
UID:{}
|
|
SUMMARY:{}
|
|
DESCRIPTION:{}
|
|
LOCATION:Otaku Manga Lounge
|
|
END:VEVENT"#,
|
|
date_start.format("%Y%m%dT%H%M%SZ"),
|
|
date_end.format("%Y%m%dT%H%M%SZ"),
|
|
timestamp.format("%Y%m%dT%H%M%SZ"),
|
|
uid,
|
|
title,
|
|
description
|
|
)
|
|
}
|
|
|
|
fn split_time(time: &str) -> (&str, &str) {
|
|
if let Some(dash_idx) = time.find('-') {
|
|
let start = &time[1..(dash_idx - 1)];
|
|
let end = &time[(dash_idx + 2)..];
|
|
(start, end)
|
|
} else {
|
|
(&time[1..], "10:00 pm")
|
|
}
|
|
}
|
|
|
|
fn write_cal(folder: &str, name: &str, events: &Vec<String>) -> io::Result<String> {
|
|
let filename = Path::new(folder).join(name).with_extension("isc");
|
|
println!("writing {:?}", filename);
|
|
let mut file = File::create(&filename)?;
|
|
file.write_all(b"BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//bksp.space - EventScraper//\n")?;
|
|
for event in events {
|
|
write!(file, "{}\n", event)?;
|
|
}
|
|
file.write_all(b"END:VCALENDAR")?;
|
|
Ok(filename.file_name().unwrap().to_str().unwrap().to_string())
|
|
}
|
|
|
|
fn write_index(folder: &str, events: Vec<(String, String)>) -> io::Result<()> {
|
|
let mut index = File::create(Path::new(folder).join("index.html"))?;
|
|
|
|
index.write_all(br#"
|
|
<!doctype html>
|
|
<html>
|
|
<head>
|
|
<meta charse="ut8">
|
|
<title>Otaku Event Calendars</title>
|
|
</head>
|
|
<body>
|
|
<h1>Otaku Event Calendars</h1>
|
|
<ul>
|
|
"#)?;
|
|
|
|
for (url, name) in events {
|
|
write!(index, r#"<li><a href="{}">{}</a></li>"#, url, name)?;
|
|
}
|
|
|
|
index.write_all(br#"
|
|
</ul>
|
|
</body>
|
|
</html>
|
|
"#)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn main() -> io::Result<()> {
|
|
let config = get_config();
|
|
|
|
let frags_iter = (0..config.fetch_months).map(|n| add_to_month(config.ym_start, n))
|
|
.filter_map(|ym| get_events_html(ym).ok())
|
|
.map(|body| Html::parse_fragment(&body));
|
|
|
|
let days_selector = Selector::parse(".mec-calendar-events-sec").unwrap();
|
|
let title_selector = Selector::parse("h4.mec-event-title").unwrap();
|
|
let time_selector = Selector::parse(".mec-event-time").unwrap();
|
|
let json_ld_selector = Selector::parse(r#"script[type="application/ld+json"]"#).unwrap();
|
|
|
|
|
|
let mut activities = Vec::new();
|
|
let mut all_courses = Vec::new();
|
|
let mut courses = HashMap::new();
|
|
|
|
for fragment in frags_iter {
|
|
let days = fragment.select(&days_selector);
|
|
|
|
for day in days {
|
|
let date = day.value().attr("data-mec-cell").unwrap_or_default();
|
|
|
|
let titles = day.select(&title_selector).map(get_inner_text);
|
|
let times = day.select(&time_selector).map(get_inner_text);
|
|
let descriptions = day.select(&json_ld_selector).map(get_description_from_bad_json_ld);
|
|
|
|
let mut idx = 0u8;
|
|
for (title, time, description) in izip!(titles, times, descriptions) {
|
|
let event = create_event(&title, &description.unwrap_or_default(), &date, time, idx);
|
|
|
|
if let Some(0) = title.find("Cours") {
|
|
all_courses.push(event.clone());
|
|
let this_course = courses.entry(title).or_insert(Vec::<String>::new());
|
|
this_course.push(event);
|
|
} else {
|
|
activities.push(event);
|
|
}
|
|
idx += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut event_names = Vec::new();
|
|
|
|
event_names.push((write_cal(&config.out_dir, "activities", &activities)?, "Activities".into()));
|
|
event_names.push((write_cal(&config.out_dir, "all_courses", &activities)?, "All courses".into()));
|
|
|
|
for (title, events) in courses.iter() {
|
|
event_names.push((write_cal(&config.out_dir, title, &events)?, title.into()));
|
|
}
|
|
|
|
write_index(&config.out_dir, event_names)?;
|
|
Ok(())
|
|
}
|