hisako-arato/fetch_wiki.py

69 lines
1.9 KiB
Python

from pathlib import Path
import json
import requests
import logging
import config
WIKI_BASE = "https://wiki.ppsfleet.navy/api"
TOKEN_ID = config.BOOKSTACK_TOKEN_ID
TOKEN_SECRET = config.BOOKSTACK_TOKEN_SECRET
def fetch_chapter(chapter_id):
chapter_req = requests.get(WIKI_BASE + "/chapters/" + str(chapter_id), headers={
"Authorization": "Token "+TOKEN_ID+":"+TOKEN_SECRET})
return chapter_req.json()
def get_page_id_list_of_chapter(chapter_id):
return [page['id'] for page in fetch_chapter(chapter_id)['pages']]
def fetch_page(page_id):
page_req = requests.get(WIKI_BASE + "/pages/" + str(page_id), headers={
"Authorization": "Token "+TOKEN_ID+":"+TOKEN_SECRET})
return page_req.json()
def fetch_page_with_cache(page_id):
parent_path = Path.home().joinpath('.cache', 'hisako', 'pages')
parent_path.mkdir(parents=True, exist_ok=True)
file_path = parent_path.joinpath(str(page_id)+".json")
try:
if not file_path.is_file():
raise ValueError("not a file")
logging.info("Trying to use cache for page " + str(page_id))
with open(file_path) as f:
return json.load(f)
except:
logging.info("Fetching page" + str(page_id))
content = fetch_page(page_id)
with open(file_path, "w") as outfile:
outfile.write(json.dumps(content, indent=4))
return content
def fetch_all_page_of_chapter(chapter_id, cache=True):
logging.info("Fetching all page of chapter " + str(chapter_id))
if cache:
get_page_func = fetch_page_with_cache
else:
get_page_func = fetch_page
return [get_page_func(page_id) for page_id in get_page_id_list_of_chapter(chapter_id)]
if __name__ == "__main__":
logging.basicConfig(
level=10,
format="%(asctime)s %(filename)s:%(lineno)s %(levelname)s %(message)s"
)
fetch_all_page_of_chapter(9)