Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cache system #154

Merged
merged 5 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions statements_manager/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
from logging import Logger, basicConfig, getLogger

from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.project import Project
from statements_manager.src.utils import ask_ok, create_token

Expand Down Expand Up @@ -57,8 +58,8 @@ def get_parser() -> argparse.ArgumentParser:
subparser.add_argument(
"-o",
"--output",
default="html",
choices=["html", "md", "pdf"],
default=OutputFileKind.HTML.value,
choices=OutputFileKind.values(),
help="output format (defaults to 'html')",
)
subparser.add_argument(
Expand Down
146 changes: 36 additions & 110 deletions statements_manager/src/convert_task_runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from __future__ import annotations

import glob
import hashlib
import json
import os
import pathlib
import shutil
from enum import Enum
Expand All @@ -15,10 +12,12 @@
from googleapiclient.discovery import build

from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.params_maker.lang_to_class import lang_to_class
from statements_manager.src.render_result_cache import RenderResultCache
from statements_manager.src.renderer import Renderer
from statements_manager.src.statement_location_mode import StatementLocationMode
from statements_manager.src.utils import create_token, dict_merge
from statements_manager.src.utils import create_token

logger: Logger = getLogger(__name__)

Expand All @@ -27,17 +26,6 @@ class ContentsStatus(Enum):
OK, NG = range(2)


def need_to_save(
cache: dict[str, Any],
reference_cache: dict[str, Any],
force_dump: bool,
output_path: str,
):
return (
cache != reference_cache or force_dump or not pathlib.Path(output_path).exists()
)


class ConvertTaskRunner:
def __init__(self, problemset_config: ProblemSetConfig):
self._cwd = Path.cwd()
Expand Down Expand Up @@ -185,87 +173,70 @@ def make_pdf_attr(self, is_problemset: bool) -> dict[str, Any]:
def run_rendering(
self,
output_dir: Path,
output_ext: str,
output_ext: OutputFileKind,
problem_ids: List[str],
is_problemset: bool,
force_dump: bool,
cache: dict[str, Any],
reference_cache: dict[str, Any],
) -> dict[str, Any]:
cache: RenderResultCache,
) -> None:
if is_problemset:
output_path = str(output_dir / ("problemset." + output_ext))
output_path = str(output_dir / ("problemset." + output_ext.value))
else:
output_path = str(output_dir / (problem_ids[0] + "." + output_ext))
logger.info(f"saving replaced {output_ext}")
if output_ext == "html":
output_path = str(output_dir / (problem_ids[0] + "." + output_ext.value))
logger.info(f"saving replaced {output_ext.value}")
if output_ext == OutputFileKind.HTML:
html = self.renderer.generate_html(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
)
cache["contents"] = hashlib.sha256(html.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(html)
if cache.need_to_save(force_dump):
self.save_file(html, output_path)
else:
logger.warning("skip dumping html: same result as before")
elif output_ext == "pdf":
elif output_ext == OutputFileKind.PDF:
pdf_attr = self.make_pdf_attr(is_problemset)
html = self.renderer.generate_html_for_pdf(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
pdf_path=output_path,
)
cache["contents"] = hashlib.sha256(html.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(html)
if cache.need_to_save(force_dump):
wait_second = int(cast(int, pdf_attr["javascript-delay"]))
if wait_second > 0:
logger.info(f"please wait... ({wait_second} [msec] or greater)")
pdfkit.from_string(html, output_path, verbose=True, options=pdf_attr)
else:
logger.warning("skip dumping pdf: same result as before")
elif output_ext == "md":
elif output_ext == OutputFileKind.MARKDOWN:
md = self.renderer.generate_markdown(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
is_problemset=is_problemset,
)
cache["contents"] = hashlib.sha256(md.encode()).hexdigest()
if need_to_save(
cache,
reference_cache,
force_dump,
output_path,
):
cache.set_content(md)
if cache.need_to_save(force_dump):
self.save_file(md, output_path)
else:
logger.warning("skip dumping md: same result as before")
else:
logger.error(f"invalid extension '{output_ext}'")
raise ValueError(f"invalid extension '{output_ext}'")
return cache
logger.error(f"invalid extension '{output_ext.value}'")
raise ValueError(f"invalid extension '{output_ext.value}'")

def run(
self,
problem_ids: List[str],
output_ext: str,
output_ext: OutputFileKind,
make_problemset: bool,
force_dump: bool,
constraints_only: bool,
) -> None:
# 問題文を取ってきて変換
valid_problem_ids = []
problemset_cache: dict[str, Any] = {}
has_diff = False
for problem_id in problem_ids:
logger.info(f"rendering [problem id: {problem_id}]")
problem_config = self.problemset_config.get_problem(problem_id)
Expand All @@ -290,56 +261,22 @@ def run(
output_dir.mkdir()

# キャッシュの記録
problem_cache: dict[str, Any] = {"assets": {}}
reference_cache: dict[str, Any] = {}
if Path(output_dir / "cache.json").exists():
reference_cache = json.load(open(output_dir / "cache.json"))
reference_cache.setdefault(output_ext, {})
reference_cache[output_ext].setdefault(problem_id, {})
problem_group = self.problemset_config.get_problem_group(problem_id)
for ext in reference_cache.keys():
obsoleted_ids = list(
filter(
lambda id: id not in problem_group, reference_cache[ext].keys()
)
)
for id in obsoleted_ids:
reference_cache[ext].pop(id)

problem_cache["assets"] = self.copy_assets(
problem_id, output_dir / "assets"
cache = RenderResultCache(
output_dir,
output_ext,
problem_id,
self.problemset_config.get_problem_group(problem_id),
)
reference_cache[output_ext][problem_id] = self.run_rendering(
cache.set_assets(self.copy_assets(problem_id, output_dir / "assets"))
self.run_rendering(
output_dir=output_dir,
output_ext=output_ext,
problem_ids=[problem_id],
is_problemset=False,
force_dump=force_dump,
cache=problem_cache,
reference_cache=reference_cache[output_ext][problem_id],
cache=cache,
)
json.dump(
reference_cache,
open(output_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
)
dict_merge(problemset_cache, reference_cache[output_ext])

filenames = list(
filter(
lambda filename: pathlib.Path(filename).stem not in problem_group,
sum(
[
list(glob.glob(str(output_dir) + f"/*.{ext}"))
for ext in ["html", "pdf", "md"]
],
[],
),
)
)
for filename in filenames:
os.remove(filename)
has_diff |= cache.save_and_check_diff()
logger.info("")

# 問題セットに対応するものを出力
Expand All @@ -360,24 +297,13 @@ def run(
self.problemset_dir / "assets" / problem_id,
)
logger.info("rendering problemset")
reference_problemset_cache: dict[str, Any] = {}
if Path(self.problemset_dir / "cache.json").exists():
reference_problemset_cache = json.load(
open(self.problemset_dir / "cache.json")
)
reference_problemset_cache.setdefault(output_ext, {})
reference_problemset_cache[output_ext] = self.run_rendering(
cache = RenderResultCache(self.problemset_dir, output_ext)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

問題セットのキャッシュファイルは構造が変わるが、許容する

self.run_rendering(
output_dir=self.problemset_dir,
output_ext=output_ext,
problem_ids=valid_problem_ids,
is_problemset=True,
force_dump=force_dump,
cache=problemset_cache,
reference_cache=reference_problemset_cache[output_ext],
)
json.dump(
reference_problemset_cache,
open(self.problemset_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
force_dump=force_dump or has_diff,
cache=cache,
)
cache.save_and_check_diff()
14 changes: 14 additions & 0 deletions statements_manager/src/output_file_kind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from __future__ import annotations

import enum


@enum.unique
class OutputFileKind(enum.Enum):
MARKDOWN = "md"
HTML = "html"
PDF = "pdf"

@staticmethod
def values() -> list[str]:
return [file_kind.value for file_kind in OutputFileKind]
3 changes: 2 additions & 1 deletion statements_manager/src/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from statements_manager.src.convert_task_runner import ConvertTaskRunner
from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.utils import read_toml_file

logger: Logger = getLogger(__name__)
Expand All @@ -13,7 +14,7 @@
class Project:
def __init__(self, working_dir: str, ext: str) -> None:
self._cwd: Path = Path(working_dir).resolve()
self._ext: str = ext
self._ext: OutputFileKind = OutputFileKind(ext)
self.problemset_config = self._fetch_problemset_config()
self.task_runner = ConvertTaskRunner(
problemset_config=self.problemset_config,
Expand Down
Loading
Loading