Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cache system #154

Merged
merged 5 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions statements_manager/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from statements_manager.src.project import Project
from statements_manager.src.utils import ask_ok, create_token
from statements_manager.src.output_file_kind import OutputFileKind

logger: Logger = getLogger(__name__)

Expand Down Expand Up @@ -57,8 +58,8 @@ def get_parser() -> argparse.ArgumentParser:
subparser.add_argument(
"-o",
"--output",
default="html",
choices=["html", "md", "pdf"],
default=OutputFileKind.HTML.value,
choices=OutputFileKind.values(),
help="output format (defaults to 'html')",
)
subparser.add_argument(
Expand Down
117 changes: 39 additions & 78 deletions statements_manager/src/convert_task_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
from googleapiclient.discovery import build

from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.render_result_cache import RenderResultCache
from statements_manager.src.params_maker.lang_to_class import lang_to_class
from statements_manager.src.renderer import Renderer
from statements_manager.src.statement_location_mode import StatementLocationMode
from statements_manager.src.utils import create_token, dict_merge
from statements_manager.src.utils import create_token

logger: Logger = getLogger(__name__)

Expand Down Expand Up @@ -185,19 +187,19 @@ def make_pdf_attr(self, is_problemset: bool) -> dict[str, Any]:
def run_rendering(
self,
output_dir: Path,
output_ext: str,
output_ext: OutputFileKind,
problem_ids: List[str],
is_problemset: bool,
force_dump: bool,
cache: dict[str, Any],
reference_cache: dict[str, Any],
) -> dict[str, Any]:
if is_problemset:
output_path = str(output_dir / ("problemset." + output_ext))
output_path = str(output_dir / ("problemset." + output_ext.value))
else:
output_path = str(output_dir / (problem_ids[0] + "." + output_ext))
logger.info(f"saving replaced {output_ext}")
if output_ext == "html":
output_path = str(output_dir / (problem_ids[0] + "." + output_ext.value))
logger.info(f"saving replaced {output_ext.value}")
if output_ext == OutputFileKind.HTML:
html = self.renderer.generate_html(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
Expand All @@ -213,7 +215,7 @@ def run_rendering(
self.save_file(html, output_path)
else:
logger.warning("skip dumping html: same result as before")
elif output_ext == "pdf":
elif output_ext == OutputFileKind.PDF:
pdf_attr = self.make_pdf_attr(is_problemset)
html = self.renderer.generate_html_for_pdf(
problemset_config=self.problemset_config,
Expand All @@ -234,7 +236,7 @@ def run_rendering(
pdfkit.from_string(html, output_path, verbose=True, options=pdf_attr)
else:
logger.warning("skip dumping pdf: same result as before")
elif output_ext == "md":
elif output_ext == OutputFileKind.MARKDOWN:
md = self.renderer.generate_markdown(
problemset_config=self.problemset_config,
problem_ids=problem_ids,
Expand All @@ -251,21 +253,21 @@ def run_rendering(
else:
logger.warning("skip dumping md: same result as before")
else:
logger.error(f"invalid extension '{output_ext}'")
raise ValueError(f"invalid extension '{output_ext}'")
logger.error(f"invalid extension '{output_ext.value}'")
raise ValueError(f"invalid extension '{output_ext.value}'")
return cache

def run(
self,
problem_ids: List[str],
output_ext: str,
output_ext: OutputFileKind,
make_problemset: bool,
force_dump: bool,
constraints_only: bool,
) -> None:
# 問題文を取ってきて変換
valid_problem_ids = []
problemset_cache: dict[str, Any] = {}
has_diff = False
for problem_id in problem_ids:
logger.info(f"rendering [problem id: {problem_id}]")
problem_config = self.problemset_config.get_problem(problem_id)
Expand All @@ -290,56 +292,24 @@ def run(
output_dir.mkdir()

# キャッシュの記録
problem_cache: dict[str, Any] = {"assets": {}}
reference_cache: dict[str, Any] = {}
if Path(output_dir / "cache.json").exists():
reference_cache = json.load(open(output_dir / "cache.json"))
reference_cache.setdefault(output_ext, {})
reference_cache[output_ext].setdefault(problem_id, {})
problem_group = self.problemset_config.get_problem_group(problem_id)
for ext in reference_cache.keys():
obsoleted_ids = list(
filter(
lambda id: id not in problem_group, reference_cache[ext].keys()
)
)
for id in obsoleted_ids:
reference_cache[ext].pop(id)

problem_cache["assets"] = self.copy_assets(
problem_id, output_dir / "assets"
)
reference_cache[output_ext][problem_id] = self.run_rendering(
output_dir=output_dir,
output_ext=output_ext,
problem_ids=[problem_id],
is_problemset=False,
force_dump=force_dump,
cache=problem_cache,
reference_cache=reference_cache[output_ext][problem_id],
cache = RenderResultCache(
output_dir,
output_ext,
problem_id,
self.problemset_config.get_problem_group(problem_id),
)
json.dump(
reference_cache,
open(output_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
)
dict_merge(problemset_cache, reference_cache[output_ext])

filenames = list(
filter(
lambda filename: pathlib.Path(filename).stem not in problem_group,
sum(
[
list(glob.glob(str(output_dir) + f"/*.{ext}"))
for ext in ["html", "pdf", "md"]
],
[],
),
cache.set_assets(self.copy_assets(problem_id, output_dir / "assets"))
has_diff |= cache.save_and_check_diff(
self.run_rendering(
output_dir=output_dir,
output_ext=output_ext,
problem_ids=[problem_id],
is_problemset=False,
force_dump=force_dump,
cache=cache.get_current(),
reference_cache=cache.get_previous(),
tsutaj marked this conversation as resolved.
Show resolved Hide resolved
)
)
for filename in filenames:
os.remove(filename)
logger.info("")

# 問題セットに対応するものを出力
Expand All @@ -360,24 +330,15 @@ def run(
self.problemset_dir / "assets" / problem_id,
)
logger.info("rendering problemset")
reference_problemset_cache: dict[str, Any] = {}
if Path(self.problemset_dir / "cache.json").exists():
reference_problemset_cache = json.load(
open(self.problemset_dir / "cache.json")
cache = RenderResultCache(self.problemset_dir, output_ext)
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

問題セットのキャッシュファイルは構造が変わるが、許容する

cache.save_and_check_diff(
self.run_rendering(
output_dir=self.problemset_dir,
output_ext=output_ext,
problem_ids=valid_problem_ids,
is_problemset=True,
force_dump=force_dump or has_diff,
Copy link
Owner Author

@tsutaj tsutaj May 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

どれかの問題で差分が発生していたら、problemset が更新される
(以前は cache の中身で判定していたが、判定方法を変更)

cache=cache.get_current(),
reference_cache=cache.get_previous(),
)
reference_problemset_cache.setdefault(output_ext, {})
reference_problemset_cache[output_ext] = self.run_rendering(
output_dir=self.problemset_dir,
output_ext=output_ext,
problem_ids=valid_problem_ids,
is_problemset=True,
force_dump=force_dump,
cache=problemset_cache,
reference_cache=reference_problemset_cache[output_ext],
)
json.dump(
reference_problemset_cache,
open(self.problemset_dir / "cache.json", "w"),
indent=4,
sort_keys=True,
)
13 changes: 13 additions & 0 deletions statements_manager/src/output_file_kind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations
import enum


@enum.unique
class OutputFileKind(enum.Enum):
MARKDOWN = "md"
HTML = "html"
PDF = "pdf"

@staticmethod
def values() -> list[str]:
return [file_kind.value for file_kind in OutputFileKind]
3 changes: 2 additions & 1 deletion statements_manager/src/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from statements_manager.src.convert_task_runner import ConvertTaskRunner
from statements_manager.src.execute_config import ProblemSetConfig
from statements_manager.src.output_file_kind import OutputFileKind
from statements_manager.src.utils import read_toml_file

logger: Logger = getLogger(__name__)
Expand All @@ -13,7 +14,7 @@
class Project:
def __init__(self, working_dir: str, ext: str) -> None:
self._cwd: Path = Path(working_dir).resolve()
self._ext: str = ext
self._ext: OutputFileKind = OutputFileKind(ext)
self.problemset_config = self._fetch_problemset_config()
self.task_runner = ConvertTaskRunner(
problemset_config=self.problemset_config,
Expand Down
100 changes: 100 additions & 0 deletions statements_manager/src/render_result_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from __future__ import annotations
import enum
import pathlib
import json
import glob
import os
import copy
from typing import Any, Optional
from statements_manager.src.output_file_kind import OutputFileKind


@enum.unique
class CacheKey(enum.Enum):
ASSETS = "assets"
CONTENTS = "contents"


class RenderResultCache:
def __init__(
self,
output_dir: pathlib.Path,
output_ext: OutputFileKind,
problem_id: Optional[str] = None,
problem_group: Optional[list[str]] = None,
):
self.output_dir = output_dir
self.output_ext = output_ext
self.cache_path = output_dir / "cache.json"
self.problem_id = "problemset" if problem_id is None else problem_id
self.problem_group = ["problemset"] if problem_group is None else problem_group
self.cache = self._load_and_setup_cache()
self.prev_cache = copy.deepcopy(self.cache)

def _load_cache(self) -> dict:
if not self.cache_path.exists():
return {}
with open(self.cache_path, "r") as f:
return self._cleanup(json.load(f))

def _setup_cache(self, cache: dict) -> dict:
cache.setdefault(self.output_ext.value, {})
cache[self.output_ext.value].setdefault(self.problem_id, {})
cache[self.output_ext.value][self.problem_id].setdefault(
CacheKey.ASSETS.value, {}
)
return cache

def _load_and_setup_cache(self) -> dict:
cache = self._load_cache()
return self._setup_cache(cache)

def _cleanup(self, cache: dict) -> dict:
Copy link
Owner Author

@tsutaj tsutaj May 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ここで行っているのは、以下の 2 つ

  • 不要になった id を列挙し、キャッシュの辞書から削除
  • 不要になった id に関係あるファイルを列挙し、ファイルを削除

やや非自明で、一応コメントがあるといいかもだけど、これくらいならなくても読める気もする。

for ext in cache.keys():
obsoleted_ids = list(
filter(lambda id: id not in self.problem_group, cache[ext].keys())
)
for id in obsoleted_ids:
cache[ext].pop(id)
obsolete_filenames = list(
filter(
lambda filename: pathlib.Path(filename).stem not in self.problem_group,
sum(
[
list(glob.glob(str(self.output_dir) + f"/*.{ext}"))
for ext in OutputFileKind.values()
],
[],
),
)
)
for filename in obsolete_filenames:
os.remove(filename)
return cache

def get_current(self) -> dict[str, Any]:
return self.cache[self.output_ext.value][self.problem_id]

def get_previous(self) -> dict[str, Any]:
return self.prev_cache[self.output_ext.value][self.problem_id]

def set_assets(self, assets_dict: dict[str, Any]):
self.cache[self.output_ext.value][self.problem_id][
CacheKey.ASSETS.value
] = assets_dict

def save_and_check_diff(self, cache_dict: dict[str, Any]) -> bool:
self.cache[self.output_ext.value][self.problem_id] = cache_dict
with open(self.cache_path, "w") as f:
json.dump(
self.cache,
open(self.cache_path, "w"),
indent=4,
sort_keys=True,
)
has_diff = (
self.cache[self.output_ext.value][self.problem_id]
!= self.prev_cache[self.output_ext.value][self.problem_id]
)
self.prev_cache = copy.deepcopy(self.cache)
return has_diff
Loading