Skip to content

Commit

Permalink
Refactor cache system (#154)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsutaj authored May 17, 2024
1 parent 984459c commit bccb12f
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 113 deletions.
5 changes: 3 additions & 2 deletions statements_manager/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
from logging import Logger, basicConfig, getLogger

from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.project import Project
from statements_manager.src.utils import ask_ok, create_token

Expand Down Expand Up @@ -57,8 +58,8 @@ def get_parser() -> argparse.ArgumentParser:
subparser.add_argument(
"-o",
"--output",
default="html",
choices=["html", "md", "pdf"],
default=OutputFileKind.HTML.value,
choices=OutputFileKind.values(),
help="output format (defaults to 'html')",
)
subparser.add_argument(
Expand Down
146 changes: 36 additions & 110 deletions statements_manager/src/convert_task_runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from __future__ import annotations

import glob
import hashlib
import json
import os
import pathlib
import shutil
from enum import Enum
Expand All @@ -15,10 +12,12 @@
from googleapiclient.discovery import build

from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.params_maker.lang_to_class import lang_to_class
from statements_manager.src.render_result_cache import RenderResultCache
from statements_manager.src.renderer import Renderer
from statements_manager.src.statement_location_mode import StatementLocationMode
from statements_manager.src.utils import create_token, dict_merge
from statements_manager.src.utils import create_token

logger: Logger = getLogger(__name__)

Expand All @@ -27,17 +26,6 @@ class ContentsStatus(Enum):
OK, NG = range(2)


def need_to_save(
cache: dict[str, Any],
reference_cache: dict[str, Any],
force_dump: bool,
output_path: str,
):
return (
cache != reference_cache or force_dump or not pathlib.Path(output_path).exists()
)


class ConvertTaskRunner:
def __init__(self, problemset_config: ProblemSetConfig):
self._cwd = Path.cwd()
Expand Down Expand Up @@ -185,87 +173,70 @@ def make_pdf_attr(self, is_problemset: bool) -> dict[str, Any]:
def run_rendering(
self,
output_dir: Path,
output_ext: str,
output_ext: OutputFileKind,
problem_ids: List[str],
is_problemset: bool,
force_dump: bool,
cache: dict[str, Any],
reference_cache: dict[str, Any],
) -> dict[str, Any]:
cache: RenderResultCache,
) -> None:
if is_problemset:
output_path = str(output_dir / ("problemset." + output_ext))
output_path = str(output_dir / ("problemset." + output_ext.value))
else:
output_path = str(output_dir / (problem_ids[0] + "." + output_ext))
logger.info(f"saving replaced {output_ext}")
if output_ext == "html":
output_path = str(output_dir / (problem_ids[0] + "." + output_ext.value))
logger.info(f"saving replaced {output_ext.value}")
if output_ext == OutputFileKind.HTML:
html = self.renderer.generate_html(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
)
cache["contents"] = hashlib.sha256(html.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(html)
if cache.need_to_save(force_dump):
self.save_file(html, output_path)
else:
logger.warning("skip dumping html: same result as before")
elif output_ext == "pdf":
elif output_ext == OutputFileKind.PDF:
pdf_attr = self.make_pdf_attr(is_problemset)
html = self.renderer.generate_html_for_pdf(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
pdf_path=output_path,
)
cache["contents"] = hashlib.sha256(html.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(html)
if cache.need_to_save(force_dump):
wait_second = int(cast(int, pdf_attr["javascript-delay"]))
if wait_second > 0:
logger.info(f"please wait... ({wait_second} [msec] or greater)")
pdfkit.from_string(html, output_path, verbose=True, options=pdf_attr)
else:
logger.warning("skip dumping pdf: same result as before")
elif output_ext == "md":
elif output_ext == OutputFileKind.MARKDOWN:
md = self.renderer.generate_markdown(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
)
cache["contents"] = hashlib.sha256(md.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(md)
if cache.need_to_save(force_dump):
self.save_file(md, output_path)
else:
logger.warning("skip dumping md: same result as before")
else:
logger.error(f"invalid extension '{output_ext}'")
raise ValueError(f"invalid extension '{output_ext}'")
return cache
logger.error(f"invalid extension '{output_ext.value}'")
raise ValueError(f"invalid extension '{output_ext.value}'")

def run(
self,
problem_ids: List[str],
output_ext: str,
output_ext: OutputFileKind,
make_problemset: bool,
force_dump: bool,
constraints_only: bool,
) -> None:
# 問題文を取ってきて変換
valid_problem_ids = []
problemset_cache: dict[str, Any] = {}
has_diff = False
for problem_id in problem_ids:
logger.info(f"rendering [problem id: {problem_id}]")
problem_config = self.problemset_config.get_problem(problem_id)
Expand All @@ -290,56 +261,22 @@ def run(
output_dir.mkdir()

# キャッシュの記録
problem_cache: dict[str, Any] = {"assets": {}}
reference_cache: dict[str, Any] = {}
if Path(output_dir / "cache.json").exists():
reference_cache = json.load(open(output_dir / "cache.json"))
reference_cache.setdefault(output_ext, {})
reference_cache[output_ext].setdefault(problem_id, {})
problem_group = self.problemset_config.get_problem_group(problem_id)
for ext in reference_cache.keys():
obsoleted_ids = list(
filter(
lambda id: id not in problem_group, reference_cache[ext].keys()
)
)
for id in obsoleted_ids:
reference_cache[ext].pop(id)

problem_cache["assets"] = self.copy_assets(
problem_id, output_dir / "assets"
cache = RenderResultCache(
output_dir,
output_ext,
problem_id,
self.problemset_config.get_problem_group(problem_id),
)
reference_cache[output_ext][problem_id] = self.run_rendering(
cache.set_assets(self.copy_assets(problem_id, output_dir / "assets"))
self.run_rendering(
output_dir=output_dir,
output_ext=output_ext,
problem_ids=[problem_id],
is_problemset=False,
force_dump=force_dump,
cache=problem_cache,
reference_cache=reference_cache[output_ext][problem_id],
cache=cache,
)
json.dump(
reference_cache,
open(output_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
)
dict_merge(problemset_cache, reference_cache[output_ext])

filenames = list(
filter(
lambda filename: pathlib.Path(filename).stem not in problem_group,
sum(
[
list(glob.glob(str(output_dir) + f"/*.{ext}"))
for ext in ["html", "pdf", "md"]
],
[],
),
)
)
for filename in filenames:
os.remove(filename)
has_diff |= cache.save_and_check_diff()
logger.info("")

# 問題セットに対応するものを出力
Expand All @@ -360,24 +297,13 @@ def run(
self.problemset_dir / "assets" / problem_id,
)
logger.info("rendering problemset")
reference_problemset_cache: dict[str, Any] = {}
if Path(self.problemset_dir / "cache.json").exists():
reference_problemset_cache = json.load(
open(self.problemset_dir / "cache.json")
)
reference_problemset_cache.setdefault(output_ext, {})
reference_problemset_cache[output_ext] = self.run_rendering(
cache = RenderResultCache(self.problemset_dir, output_ext)
self.run_rendering(
output_dir=self.problemset_dir,
output_ext=output_ext,
problem_ids=valid_problem_ids,
is_problemset=True,
force_dump=force_dump,
cache=problemset_cache,
reference_cache=reference_problemset_cache[output_ext],
)
json.dump(
reference_problemset_cache,
open(self.problemset_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
force_dump=force_dump or has_diff,
cache=cache,
)
cache.save_and_check_diff()
14 changes: 14 additions & 0 deletions statements_manager/src/output_file_kind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

import enum


@enum.unique
class OutputFileKind(enum.Enum):
MARKDOWN = "md"
HTML = "html"
PDF = "pdf"

@staticmethod
def values() -> list[str]:
return [file_kind.value for file_kind in OutputFileKind]
3 changes: 2 additions & 1 deletion statements_manager/src/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from statements_manager.src.convert_task_runner import ConvertTaskRunner
from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.utils import read_toml_file

logger: Logger = getLogger(__name__)
Expand All @@ -13,7 +14,7 @@
class Project:
def __init__(self, working_dir: str, ext: str) -> None:
self._cwd: Path = Path(working_dir).resolve()
self._ext: str = ext
self._ext: OutputFileKind = OutputFileKind(ext)
self.problemset_config = self._fetch_problemset_config()
self.task_runner = ConvertTaskRunner(
problemset_config=self.problemset_config,
Expand Down
Loading

0 comments on commit bccb12f

Please sign in to comment.