diff --git a/statements_manager/main.py b/statements_manager/main.py index ef4646b..437cd77 100644 --- a/statements_manager/main.py +++ b/statements_manager/main.py @@ -4,6 +4,7 @@ import shutil from logging import Logger, basicConfig, getLogger +from statements_manager.src.output_file_kind import OutputFileKind from statements_manager.src.project import Project from statements_manager.src.utils import ask_ok, create_token @@ -57,8 +58,8 @@ def get_parser() -> argparse.ArgumentParser: subparser.add_argument( "-o", "--output", - default="html", - choices=["html", "md", "pdf"], + default=OutputFileKind.HTML.value, + choices=OutputFileKind.values(), help="output format (defaults to 'html')", ) subparser.add_argument( diff --git a/statements_manager/src/convert_task_runner.py b/statements_manager/src/convert_task_runner.py index 28470e5..2226a7a 100644 --- a/statements_manager/src/convert_task_runner.py +++ b/statements_manager/src/convert_task_runner.py @@ -1,9 +1,6 @@ from __future__ import annotations -import glob import hashlib -import json -import os import pathlib import shutil from enum import Enum @@ -15,10 +12,12 @@ from googleapiclient.discovery import build from statements_manager.src.execute_config import ProblemSetConfig +from statements_manager.src.output_file_kind import OutputFileKind from statements_manager.src.params_maker.lang_to_class import lang_to_class +from statements_manager.src.render_result_cache import RenderResultCache from statements_manager.src.renderer import Renderer from statements_manager.src.statement_location_mode import StatementLocationMode -from statements_manager.src.utils import create_token, dict_merge +from statements_manager.src.utils import create_token logger: Logger = getLogger(__name__) @@ -27,17 +26,6 @@ class ContentsStatus(Enum): OK, NG = range(2) -def need_to_save( - cache: dict[str, Any], - reference_cache: dict[str, Any], - force_dump: bool, - output_path: str, -): - return ( - cache != reference_cache or force_dump or not pathlib.Path(output_path).exists() - ) - - class ConvertTaskRunner: def __init__(self, problemset_config: ProblemSetConfig): self._cwd = Path.cwd() @@ -185,35 +173,29 @@ def make_pdf_attr(self, is_problemset: bool) -> dict[str, Any]: def run_rendering( self, output_dir: Path, - output_ext: str, + output_ext: OutputFileKind, problem_ids: List[str], is_problemset: bool, force_dump: bool, - cache: dict[str, Any], - reference_cache: dict[str, Any], - ) -> dict[str, Any]: + cache: RenderResultCache, + ) -> None: if is_problemset: - output_path = str(output_dir / ("problemset." + output_ext)) + output_path = str(output_dir / ("problemset." + output_ext.value)) else: - output_path = str(output_dir / (problem_ids[0] + "." + output_ext)) - logger.info(f"saving replaced {output_ext}") - if output_ext == "html": + output_path = str(output_dir / (problem_ids[0] + "." + output_ext.value)) + logger.info(f"saving replaced {output_ext.value}") + if output_ext == OutputFileKind.HTML: html = self.renderer.generate_html( problemset_config=self.problemset_config, problem_ids=problem_ids, is_problemset=is_problemset, ) - cache["contents"] = hashlib.sha256(html.encode()).hexdigest() - if need_to_save( - cache, - reference_cache, - force_dump, - output_path, - ): + cache.set_content(html) + if cache.need_to_save(force_dump): self.save_file(html, output_path) else: logger.warning("skip dumping html: same result as before") - elif output_ext == "pdf": + elif output_ext == OutputFileKind.PDF: pdf_attr = self.make_pdf_attr(is_problemset) html = self.renderer.generate_html_for_pdf( problemset_config=self.problemset_config, @@ -221,51 +203,40 @@ def run_rendering( is_problemset=is_problemset, pdf_path=output_path, ) - cache["contents"] = hashlib.sha256(html.encode()).hexdigest() - if need_to_save( - cache, - reference_cache, - force_dump, - output_path, - ): + cache.set_content(html) + if cache.need_to_save(force_dump): wait_second = int(cast(int, pdf_attr["javascript-delay"])) if wait_second > 0: logger.info(f"please wait... ({wait_second} [msec] or greater)") pdfkit.from_string(html, output_path, verbose=True, options=pdf_attr) else: logger.warning("skip dumping pdf: same result as before") - elif output_ext == "md": + elif output_ext == OutputFileKind.MARKDOWN: md = self.renderer.generate_markdown( problemset_config=self.problemset_config, problem_ids=problem_ids, is_problemset=is_problemset, ) - cache["contents"] = hashlib.sha256(md.encode()).hexdigest() - if need_to_save( - cache, - reference_cache, - force_dump, - output_path, - ): + cache.set_content(md) + if cache.need_to_save(force_dump): self.save_file(md, output_path) else: logger.warning("skip dumping md: same result as before") else: - logger.error(f"invalid extension '{output_ext}'") - raise ValueError(f"invalid extension '{output_ext}'") - return cache + logger.error(f"invalid extension '{output_ext.value}'") + raise ValueError(f"invalid extension '{output_ext.value}'") def run( self, problem_ids: List[str], - output_ext: str, + output_ext: OutputFileKind, make_problemset: bool, force_dump: bool, constraints_only: bool, ) -> None: # 問題文を取ってきて変換 valid_problem_ids = [] - problemset_cache: dict[str, Any] = {} + has_diff = False for problem_id in problem_ids: logger.info(f"rendering [problem id: {problem_id}]") problem_config = self.problemset_config.get_problem(problem_id) @@ -290,56 +261,22 @@ def run( output_dir.mkdir() # キャッシュの記録 - problem_cache: dict[str, Any] = {"assets": {}} - reference_cache: dict[str, Any] = {} - if Path(output_dir / "cache.json").exists(): - reference_cache = json.load(open(output_dir / "cache.json")) - reference_cache.setdefault(output_ext, {}) - reference_cache[output_ext].setdefault(problem_id, {}) - problem_group = self.problemset_config.get_problem_group(problem_id) - for ext in reference_cache.keys(): - obsoleted_ids = list( - filter( - lambda id: id not in problem_group, reference_cache[ext].keys() - ) - ) - for id in obsoleted_ids: - reference_cache[ext].pop(id) - - problem_cache["assets"] = self.copy_assets( - problem_id, output_dir / "assets" + cache = RenderResultCache( + output_dir, + output_ext, + problem_id, + self.problemset_config.get_problem_group(problem_id), ) - reference_cache[output_ext][problem_id] = self.run_rendering( + cache.set_assets(self.copy_assets(problem_id, output_dir / "assets")) + self.run_rendering( output_dir=output_dir, output_ext=output_ext, problem_ids=[problem_id], is_problemset=False, force_dump=force_dump, - cache=problem_cache, - reference_cache=reference_cache[output_ext][problem_id], + cache=cache, ) - json.dump( - reference_cache, - open(output_dir / "cache.json", "w"), - indent=4, - sort_keys=True, - ) - dict_merge(problemset_cache, reference_cache[output_ext]) - - filenames = list( - filter( - lambda filename: pathlib.Path(filename).stem not in problem_group, - sum( - [ - list(glob.glob(str(output_dir) + f"/*.{ext}")) - for ext in ["html", "pdf", "md"] - ], - [], - ), - ) - ) - for filename in filenames: - os.remove(filename) + has_diff |= cache.save_and_check_diff() logger.info("") # 問題セットに対応するものを出力 @@ -360,24 +297,13 @@ def run( self.problemset_dir / "assets" / problem_id, ) logger.info("rendering problemset") - reference_problemset_cache: dict[str, Any] = {} - if Path(self.problemset_dir / "cache.json").exists(): - reference_problemset_cache = json.load( - open(self.problemset_dir / "cache.json") - ) - reference_problemset_cache.setdefault(output_ext, {}) - reference_problemset_cache[output_ext] = self.run_rendering( + cache = RenderResultCache(self.problemset_dir, output_ext) + self.run_rendering( output_dir=self.problemset_dir, output_ext=output_ext, problem_ids=valid_problem_ids, is_problemset=True, - force_dump=force_dump, - cache=problemset_cache, - reference_cache=reference_problemset_cache[output_ext], - ) - json.dump( - reference_problemset_cache, - open(self.problemset_dir / "cache.json", "w"), - indent=4, - sort_keys=True, + force_dump=force_dump or has_diff, + cache=cache, ) + cache.save_and_check_diff() diff --git a/statements_manager/src/output_file_kind.py b/statements_manager/src/output_file_kind.py new file mode 100644 index 0000000..d9068ff --- /dev/null +++ b/statements_manager/src/output_file_kind.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import enum + + +@enum.unique +class OutputFileKind(enum.Enum): + MARKDOWN = "md" + HTML = "html" + PDF = "pdf" + + @staticmethod + def values() -> list[str]: + return [file_kind.value for file_kind in OutputFileKind] diff --git a/statements_manager/src/project.py b/statements_manager/src/project.py index 97eed1a..88e2a40 100644 --- a/statements_manager/src/project.py +++ b/statements_manager/src/project.py @@ -5,6 +5,7 @@ from statements_manager.src.convert_task_runner import ConvertTaskRunner from statements_manager.src.execute_config import ProblemSetConfig +from statements_manager.src.output_file_kind import OutputFileKind from statements_manager.src.utils import read_toml_file logger: Logger = getLogger(__name__) @@ -13,7 +14,7 @@ class Project: def __init__(self, working_dir: str, ext: str) -> None: self._cwd: Path = Path(working_dir).resolve() - self._ext: str = ext + self._ext: OutputFileKind = OutputFileKind(ext) self.problemset_config = self._fetch_problemset_config() self.task_runner = ConvertTaskRunner( problemset_config=self.problemset_config, diff --git a/statements_manager/src/render_result_cache.py b/statements_manager/src/render_result_cache.py new file mode 100644 index 0000000..6e15f52 --- /dev/null +++ b/statements_manager/src/render_result_cache.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import copy +import enum +import glob +import hashlib +import json +import os +import pathlib +from typing import Any, Optional + +from statements_manager.src.output_file_kind import OutputFileKind + + +@enum.unique +class CacheKey(enum.Enum): + ASSETS = "assets" + CONTENTS = "contents" + + +class RenderResultCache: + def __init__( + self, + output_dir: pathlib.Path, + output_ext: OutputFileKind, + problem_id: Optional[str] = None, + problem_group: Optional[list[str]] = None, + ): + self.output_dir = output_dir + self.output_ext = output_ext.value + self.cache_path = output_dir / "cache.json" + self.problem_id = "problemset" if problem_id is None else problem_id + self.problem_group = ["problemset"] if problem_group is None else problem_group + self.cache = self._load_and_setup_cache() + self.prev_cache = copy.deepcopy(self.cache) + + def _load_cache(self) -> dict: + if not self.cache_path.exists(): + return {} + with open(self.cache_path, "r") as f: + return self._cleanup(json.load(f)) + + def _setup_cache(self, cache: dict) -> dict: + cache.setdefault(self.output_ext, {}) + cache[self.output_ext].setdefault(self.problem_id, {}) + cache[self.output_ext][self.problem_id].setdefault(CacheKey.ASSETS.value, {}) + return cache + + def _load_and_setup_cache(self) -> dict: + cache = self._load_cache() + return self._setup_cache(cache) + + def _cleanup(self, cache: dict) -> dict: + for ext in cache.keys(): + if ext not in OutputFileKind.values(): + cache.pop(ext) + else: + obsoleted_ids = list( + filter(lambda id: id not in self.problem_group, cache[ext].keys()) + ) + for id in obsoleted_ids: + cache[ext].pop(id) + obsolete_filenames = list( + filter( + lambda filename: pathlib.Path(filename).stem not in self.problem_group, + sum( + [ + list(glob.glob(str(self.output_dir) + f"/*.{ext}")) + for ext in OutputFileKind.values() + ], + [], + ), + ) + ) + for filename in obsolete_filenames: + os.remove(filename) + return cache + + def get_current(self) -> dict[str, Any]: + return self.cache[self.output_ext][self.problem_id] + + def get_previous(self) -> dict[str, Any]: + return self.prev_cache[self.output_ext][self.problem_id] + + def set_assets(self, assets_dict: dict[str, Any]): + self.cache[self.output_ext][self.problem_id][ + CacheKey.ASSETS.value + ] = assets_dict + + def set_content(self, content: str): + self.cache[self.output_ext][self.problem_id][CacheKey.CONTENTS.value] = ( + hashlib.sha256(content.encode()).hexdigest() + ) + + def need_to_save(self, force_dump: bool): + return force_dump or ( + self.cache[self.output_ext][self.problem_id] + != self.prev_cache[self.output_ext][self.problem_id] + ) + + def save_and_check_diff(self) -> bool: + json.dump( + self.cache, + open(self.cache_path, "w"), + indent=4, + sort_keys=True, + ) + has_diff = ( + self.cache[self.output_ext][self.problem_id] + != self.prev_cache[self.output_ext][self.problem_id] + ) + self.prev_cache = copy.deepcopy(self.cache) + return has_diff