cihai · tony · Feb 8, 2024 · Feb 7, 2024 · Feb 7, 2024 · Feb 7, 2024
diff --git a/CHANGES b/CHANGES
@@ -19,6 +19,25 @@ $ pipx install --suffix=@next unihan-etl --pip-args '\--pre' --force
 
 <!-- Maintainers, insert changes / features for the next release here -->
 
+### Development
+
+- Strengthen linting (#313)
+
+  - Add flake8-commas (COM)
+
+    - https://docs.astral.sh/ruff/rules/#flake8-commas-com
+    - https://pypi.org/project/flake8-commas/
+
+  - Add flake8-builtins (A)
+
+    - https://docs.astral.sh/ruff/rules/#flake8-builtins-a
+    - https://pypi.org/project/flake8-builtins/
+
+  - Add flake8-errmsg (EM)
+
+    - https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
+    - https://pypi.org/project/flake8-errmsg/
+
 ## unihan-etl 0.32.0 (2024-02-05)
 
 ### Documentation

diff --git a/docs/conf.py b/docs/conf.py
@@ -58,7 +58,7 @@
 master_doc = "index"
 
 project = about["__title__"]
-copyright = about["__copyright__"]
+project_copyright = about["__copyright__"]
 
 version = "%s" % (".".join(about["__version__"].split("."))[:2])
 release = "%s" % (about["__version__"])

diff --git a/pyproject.toml b/pyproject.toml
@@ -156,8 +156,11 @@ select = [
   "F", # pyflakes
   "I", # isort
   "UP", # pyupgrade
+  "A", # flake8-builtins
   "B", # flake8-bugbear
   "C4", # flake8-comprehensions
+  "COM", # flake8-commas
+  "EM", # flake8-errmsg
   "Q", # flake8-quotes
   "PTH", # flake8-use-pathlib
   "SIM", # flake8-simplify
@@ -166,6 +169,9 @@ select = [
   "RUF", # Ruff-specific rules
   "D", # pydocstyle
 ]
+ignore = [
+  "COM812", # missing trailing comma, ruff format conflict
+]
 
 [tool.ruff.lint.isort]
 known-first-party = ["unihan_etl", "cihai"]

diff --git a/src/unihan_etl/_internal/app_dirs.py b/src/unihan_etl/_internal/app_dirs.py
@@ -88,7 +88,7 @@ def __post_init__(self, _app_dirs: "BaseAppDirs") -> None:
                     pathlib.Path(
                         os.path.expanduser(  # noqa: PTH111
                             os.path.expandvars(str(val)),
-                        ).format(**dir_mapping)
+                        ).format(**dir_mapping),
                     ),
                 )
 
@@ -108,6 +108,6 @@ def __post_init__(self, _app_dirs: "BaseAppDirs") -> None:
                     pathlib.Path(
                         os.path.expanduser(  # noqa: PTH111
                             os.path.expandvars(str(val)),
-                        ).format(**dir_mapping)
+                        ).format(**dir_mapping),
                     ),
                 )
diff --git a/src/unihan_etl/core.py b/src/unihan_etl/core.py
@@ -146,7 +146,10 @@ def get_parser() -> argparse.ArgumentParser:
         help=f"Output of .csv. Default: {DESTINATION_DIR}/unihan.{{json,csv,yaml}}",
     )
     parser.add_argument(
-        "-w", "--work-dir", dest="work_dir", help=f"Default: {WORK_DIR}"
+        "-w",
+        "--work-dir",
+        dest="work_dir",
+        help=f"Default: {WORK_DIR}",
     )
     parser.add_argument(
         "-F",
@@ -315,7 +318,8 @@ def load_data(
     """
     log.info(f"Loading data: {', '.join([str(s) for s in files])}")
     raw_data = fileinput.FileInput(
-        files=files, openhook=fileinput.hook_encoded("utf-8")
+        files=files,
+        openhook=fileinput.hook_encoded("utf-8"),
     )
     log.info("Done loading data.")
 
@@ -412,7 +416,8 @@ def expand_delimiters(normalized_data: "UntypedNormalizedData") -> "ExpandedExpo
 
 
 def listify(
-    data: "UntypedNormalizedData", fields: t.Sequence[str]
+    data: "UntypedNormalizedData",
+    fields: t.Sequence[str],
 ) -> "ListifiedExport":
     """Convert tabularized data to a CSV-friendly list.
 
@@ -467,20 +472,23 @@ def validate_options(
 ) -> "TypeGuard[Options]":
     """Validate unihan-etl options."""
     if not is_default_option("input_files", options.input_files) and is_default_option(
-        "fields", options.fields
+        "fields",
+        options.fields,
     ):
         # Filter fields when only files specified.
         try:
             options.fields = get_fields(filter_manifest(options.input_files))
         except (KeyError, FieldNotFound) as e:
             raise FileNotSupported(str(e)) from e
     elif not is_default_option("fields", options.fields) and is_default_option(
-        "input_files", options.input_files
+        "input_files",
+        options.input_files,
     ):
         # Filter files when only field specified.
         options.input_files = get_files(options.fields)
     elif not is_default_option("fields", options.fields) and not is_default_option(
-        "input_files", options.input_files
+        "input_files",
+        options.input_files,
     ):
         # Filter fields when only files specified.
         fields_in_files = get_fields(filter_manifest(options.input_files))
@@ -520,7 +528,8 @@ def __init__(
         setup_logger(logger=None, level=options.log_level or DEFAULT_OPTIONS.log_level)
 
         merged_options = dataclasses.replace(
-            DEFAULT_OPTIONS, **dataclasses.asdict(options)
+            DEFAULT_OPTIONS,
+            **dataclasses.asdict(options),
         )
 
         self.options = merged_options
@@ -561,7 +570,7 @@ def export(self) -> t.Union[None, "UntypedNormalizedData"]:
 
         # Replace {ext} with extension to use.
         self.options.destination = pathlib.Path(
-            str(self.options.destination).format(ext=self.options.format)
+            str(self.options.destination).format(ext=self.options.format),
         )
 
         if not self.options.destination.parent.exists():
@@ -613,7 +622,7 @@ def from_cli(cls, argv: t.Sequence[str]) -> "Packager":
 
         try:
             return cls(
-                Options(**{k: v for k, v in vars(args).items() if v is not None})
+                Options(**{k: v for k, v in vars(args).items() if v is not None}),
             )
         except Exception as e:
             sys.exit(str(e))

diff --git a/src/unihan_etl/expansion.py b/src/unihan_etl/expansion.py
@@ -134,7 +134,7 @@ def expand_kAlternateTotalStrokes(
             kAlternateTotalStrokesDict(
                 strokes=strokes,
                 sources=sources,
-            )
+            ),
         )
     return expanded
 
@@ -293,13 +293,13 @@ def expand_kTGHZ2013(
                     page=int(g["page"]),
                     position=int(g["position"]),
                     entry_type=int(g["entry_type"]),
-                )
+                ),
             )
         expanded.append(
             kTGHZ2013Dict(
                 reading=reading,
                 locations=exploded_locations,
-            )
+            ),
         )
     return expanded
 
@@ -353,7 +353,7 @@ def expand_kSMSZD2003Index(
             kSMSZD2003IndexDict(
                 page=int(g["page"]),
                 position=int(g["position"]),
-            )
+            ),
         )
     return expanded
 
@@ -395,7 +395,7 @@ def expand_kSMSZD2003Readings(
             kSMSZD2003ReadingsDict(
                 mandarin=mandarin.split(","),
                 cantonese=cantonese.split(","),
-            )
+            ),
         )
     return expanded
 
@@ -448,7 +448,8 @@ def expand_kHanyuPinyin(
                 virtual=int(g["virtual"]),
             )
         expanded[i] = kHanyuPinyinDict(
-            locations=expanded[i]["locations"], readings=expanded[i]["readings"]
+            locations=expanded[i]["locations"],
+            readings=expanded[i]["readings"],
         )
     return expanded
 
@@ -511,7 +512,8 @@ def expand_kXHC1983(
                 substituted=g["substituted"] == "*",
             )
         expanded[i] = kXHC1983Dict(
-            locations=expanded[i]["locations"], reading=expanded[i]["reading"]
+            locations=expanded[i]["locations"],
+            reading=expanded[i]["reading"],
         )
     return expanded
 
@@ -592,7 +594,7 @@ def expand_kRSAdobe_Japan1_6(value: t.List[str]) -> t.List[kRSAdobe_Japan1_6Dict
                 "radical": int(g["radical"]),
                 "strokes": int(g["strokes"]),
                 "strokes-residue": int(g["strokes_residue"]),
-            }
+            },
         )
     return expanded
 
@@ -630,7 +632,7 @@ def expand_kCihaiT(value: t.List[str]) -> t.List[kCihaiTDict]:
                 "page": int(m["page"]),
                 "row": int(m["row"]),
                 "character": int(m["character"]),
-            }
+            },
         )
     return expanded
 
@@ -731,7 +733,7 @@ def expand_kFenn(value: t.List[str]) -> t.List[kFennDict]:
         assert g is not None
 
         expanded[i] = kFennDict(
-            {"phonetic": g["phonetic"], "frequency": g["frequency"]}
+            {"phonetic": g["phonetic"], "frequency": g["frequency"]},
         )
     return expanded
 
@@ -762,7 +764,7 @@ def expand_kHanyuPinlu(value: t.List[str]) -> t.List[kHanyuPinluDict]:
         assert g is not None
 
         expanded[i] = kHanyuPinluDict(
-            {"phonetic": g["phonetic"], "frequency": int(g["frequency"])}
+            {"phonetic": g["phonetic"], "frequency": int(g["frequency"])},
         )
     return expanded
 
@@ -954,7 +956,7 @@ def expand_kGSR(value: t.List[str]) -> t.List[kGSRDict]:
                 "set": int(g["set"]),
                 "letter": g["letter"],
                 "apostrophe": g["apostrophe"] == "'",
-            }
+            },
         )
     return expanded
 
@@ -1071,7 +1073,7 @@ def expand_kStrange(
             kStrangeDict(
                 property_type=property_type,
                 characters=characters,
-            )
+            ),
         )
     return expanded
 
@@ -1134,7 +1136,7 @@ def expand_kMojiJoho(
                 serial_number=serial_number,
                 variation_sequence=variation_sequence,
                 standard=serial_number == default_serial,
-            )
+            ),
         )
     return kMojiJohoDict(
         serial_number=default_serial,

diff --git a/src/unihan_etl/options.py b/src/unihan_etl/options.py
@@ -26,7 +26,7 @@ class Options:
     zip_path: pathlib.Path = UNIHAN_ZIP_PATH
     work_dir: pathlib.Path = WORK_DIR
     fields: t.Sequence[str] = dataclasses.field(
-        default_factory=lambda: INDEX_FIELDS + UNIHAN_FIELDS
+        default_factory=lambda: INDEX_FIELDS + UNIHAN_FIELDS,
     )
     format: t.Literal["json", "csv", "yaml", "python"] = "csv"
     input_files: t.List[str] = dataclasses.field(default_factory=lambda: UNIHAN_FILES)

diff --git a/src/unihan_etl/pytest_plugin.py b/src/unihan_etl/pytest_plugin.py
@@ -71,7 +71,8 @@ def unihan_full_options(unihan_full_path: pathlib.Path) -> UnihanOptions:
 
 @pytest.fixture(scope="session")
 def unihan_full_packager(
-    unihan_full_path: pathlib.Path, unihan_full_options: "UnihanOptions"
+    unihan_full_path: pathlib.Path,
+    unihan_full_options: "UnihanOptions",
 ) -> "Packager":
     """Return Packager for "full" portion of UNIHAN, return a UnihanOptions."""
     return Packager(unihan_full_options)
@@ -224,7 +225,8 @@ def unihan_quick_options(
 
 @pytest.fixture(scope="session")
 def unihan_quick_packager(
-    unihan_quick_path: pathlib.Path, unihan_quick_options: "UnihanOptions"
+    unihan_quick_path: pathlib.Path,
+    unihan_quick_options: "UnihanOptions",
 ) -> "Packager":
     """Bootstrap a small, but effective portion of UNIHAN, return a UnihanOptions."""
     return Packager(unihan_quick_options)
@@ -358,7 +360,8 @@ def unihan_home_user_name() -> str:
 
 @pytest.fixture(scope="session")
 def unihan_user_path(
-    unihan_home_path: pathlib.Path, unihan_home_user_name: str
+    unihan_home_path: pathlib.Path,
+    unihan_home_user_name: str,
 ) -> pathlib.Path:
     """Return temporary user directory.
 
@@ -427,15 +430,17 @@ def unihan_mock_test_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Pa
 
 @pytest.fixture(scope="session")
 def unihan_mock_zip_path(
-    unihan_mock_test_dir: pathlib.Path, unihan_mock_zip_pathname: str
+    unihan_mock_test_dir: pathlib.Path,
+    unihan_mock_zip_pathname: str,
 ) -> pathlib.Path:
     """Return path to Unihan zipfile."""
     return unihan_mock_test_dir / unihan_mock_zip_pathname
 
 
 @pytest.fixture(scope="session")
 def unihan_mock_zip(
-    unihan_mock_zip_path: pathlib.Path, unihan_quick_data: str
+    unihan_mock_zip_path: pathlib.Path,
+    unihan_quick_data: str,
 ) -> zipfile.ZipFile:
     """Return Unihan zipfile."""
     zf = zipfile.ZipFile(str(unihan_mock_zip_path), "a")

diff --git a/src/unihan_etl/types.py b/src/unihan_etl/types.py
@@ -63,7 +63,11 @@ class ReportHookFn(t.Protocol):
     """Progress bar callback for download()."""
 
     def __call__(
-        self, count: int, block_size: int, total_size: int, out: t.IO[str] = sys.stdout
+        self,
+        count: int,
+        block_size: int,
+        total_size: int,
+        out: t.IO[str] = sys.stdout,
     ) -> object:
         """Print progress bar during download."""
         ...

diff --git a/src/unihan_etl/util.py b/src/unihan_etl/util.py
@@ -73,7 +73,10 @@ def ucnstring_to_unicode(ucn_string: str) -> str:
 
 
 def _dl_progress(
-    count: int, block_size: int, total_size: int, out: t.IO[str] = sys.stdout
+    count: int,
+    block_size: int,
+    total_size: int,
+    out: t.IO[str] = sys.stdout,
 ) -> None:
     """
     MIT License: https://github.com/okfn/dpm-old/blob/master/dpm/util.py.
@@ -112,7 +115,7 @@ def format_size(_bytes: int) -> str:
                 percent,
                 int(round(percent / 2)) * "=",
                 int(round(50 - percent / 2)) * " ",
-            )
+            ),
         )
         out.flush()
     if maxdownloaded >= total_size: