Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Toughen rough linting #313

Merged
merged 7 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,25 @@ $ pipx install --suffix=@next unihan-etl --pip-args '\--pre' --force

<!-- Maintainers, insert changes / features for the next release here -->

### Development

- Strengthen linting (#313)

- Add flake8-commas (COM)

- https://docs.astral.sh/ruff/rules/#flake8-commas-com
- https://pypi.org/project/flake8-commas/

- Add flake8-builtins (A)

- https://docs.astral.sh/ruff/rules/#flake8-builtins-a
- https://pypi.org/project/flake8-builtins/

- Add flake8-errmsg (EM)

- https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
- https://pypi.org/project/flake8-errmsg/

## unihan-etl 0.32.0 (2024-02-05)

### Documentation
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
master_doc = "index"

project = about["__title__"]
copyright = about["__copyright__"]
project_copyright = about["__copyright__"]

version = "%s" % (".".join(about["__version__"].split("."))[:2])
release = "%s" % (about["__version__"])
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,11 @@ select = [
"F", # pyflakes
"I", # isort
"UP", # pyupgrade
"A", # flake8-builtins
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"COM", # flake8-commas
"EM", # flake8-errmsg
"Q", # flake8-quotes
"PTH", # flake8-use-pathlib
"SIM", # flake8-simplify
Expand All @@ -166,6 +169,9 @@ select = [
"RUF", # Ruff-specific rules
"D", # pydocstyle
]
ignore = [
"COM812", # missing trailing comma, ruff format conflict
]

[tool.ruff.lint.isort]
known-first-party = ["unihan_etl", "cihai"]
Expand Down
4 changes: 2 additions & 2 deletions src/unihan_etl/_internal/app_dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __post_init__(self, _app_dirs: "BaseAppDirs") -> None:
pathlib.Path(
os.path.expanduser( # noqa: PTH111
os.path.expandvars(str(val)),
).format(**dir_mapping)
).format(**dir_mapping),
),
)

Expand All @@ -108,6 +108,6 @@ def __post_init__(self, _app_dirs: "BaseAppDirs") -> None:
pathlib.Path(
os.path.expanduser( # noqa: PTH111
os.path.expandvars(str(val)),
).format(**dir_mapping)
).format(**dir_mapping),
),
)
27 changes: 18 additions & 9 deletions src/unihan_etl/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def get_parser() -> argparse.ArgumentParser:
help=f"Output of .csv. Default: {DESTINATION_DIR}/unihan.{{json,csv,yaml}}",
)
parser.add_argument(
"-w", "--work-dir", dest="work_dir", help=f"Default: {WORK_DIR}"
"-w",
"--work-dir",
dest="work_dir",
help=f"Default: {WORK_DIR}",
)
parser.add_argument(
"-F",
Expand Down Expand Up @@ -315,7 +318,8 @@ def load_data(
"""
log.info(f"Loading data: {', '.join([str(s) for s in files])}")
raw_data = fileinput.FileInput(
files=files, openhook=fileinput.hook_encoded("utf-8")
files=files,
openhook=fileinput.hook_encoded("utf-8"),
)
log.info("Done loading data.")

Expand Down Expand Up @@ -412,7 +416,8 @@ def expand_delimiters(normalized_data: "UntypedNormalizedData") -> "ExpandedExpo


def listify(
data: "UntypedNormalizedData", fields: t.Sequence[str]
data: "UntypedNormalizedData",
fields: t.Sequence[str],
) -> "ListifiedExport":
"""Convert tabularized data to a CSV-friendly list.

Expand Down Expand Up @@ -467,20 +472,23 @@ def validate_options(
) -> "TypeGuard[Options]":
"""Validate unihan-etl options."""
if not is_default_option("input_files", options.input_files) and is_default_option(
"fields", options.fields
"fields",
options.fields,
):
# Filter fields when only files specified.
try:
options.fields = get_fields(filter_manifest(options.input_files))
except (KeyError, FieldNotFound) as e:
raise FileNotSupported(str(e)) from e
elif not is_default_option("fields", options.fields) and is_default_option(
"input_files", options.input_files
"input_files",
options.input_files,
):
# Filter files when only field specified.
options.input_files = get_files(options.fields)
elif not is_default_option("fields", options.fields) and not is_default_option(
"input_files", options.input_files
"input_files",
options.input_files,
):
# Filter fields when only files specified.
fields_in_files = get_fields(filter_manifest(options.input_files))
Expand Down Expand Up @@ -520,7 +528,8 @@ def __init__(
setup_logger(logger=None, level=options.log_level or DEFAULT_OPTIONS.log_level)

merged_options = dataclasses.replace(
DEFAULT_OPTIONS, **dataclasses.asdict(options)
DEFAULT_OPTIONS,
**dataclasses.asdict(options),
)

self.options = merged_options
Expand Down Expand Up @@ -561,7 +570,7 @@ def export(self) -> t.Union[None, "UntypedNormalizedData"]:

# Replace {ext} with extension to use.
self.options.destination = pathlib.Path(
str(self.options.destination).format(ext=self.options.format)
str(self.options.destination).format(ext=self.options.format),
)

if not self.options.destination.parent.exists():
Expand Down Expand Up @@ -613,7 +622,7 @@ def from_cli(cls, argv: t.Sequence[str]) -> "Packager":

try:
return cls(
Options(**{k: v for k, v in vars(args).items() if v is not None})
Options(**{k: v for k, v in vars(args).items() if v is not None}),
)
except Exception as e:
sys.exit(str(e))
Expand Down
30 changes: 16 additions & 14 deletions src/unihan_etl/expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def expand_kAlternateTotalStrokes(
kAlternateTotalStrokesDict(
strokes=strokes,
sources=sources,
)
),
)
return expanded

Expand Down Expand Up @@ -293,13 +293,13 @@ def expand_kTGHZ2013(
page=int(g["page"]),
position=int(g["position"]),
entry_type=int(g["entry_type"]),
)
),
)
expanded.append(
kTGHZ2013Dict(
reading=reading,
locations=exploded_locations,
)
),
)
return expanded

Expand Down Expand Up @@ -353,7 +353,7 @@ def expand_kSMSZD2003Index(
kSMSZD2003IndexDict(
page=int(g["page"]),
position=int(g["position"]),
)
),
)
return expanded

Expand Down Expand Up @@ -395,7 +395,7 @@ def expand_kSMSZD2003Readings(
kSMSZD2003ReadingsDict(
mandarin=mandarin.split(","),
cantonese=cantonese.split(","),
)
),
)
return expanded

Expand Down Expand Up @@ -448,7 +448,8 @@ def expand_kHanyuPinyin(
virtual=int(g["virtual"]),
)
expanded[i] = kHanyuPinyinDict(
locations=expanded[i]["locations"], readings=expanded[i]["readings"]
locations=expanded[i]["locations"],
readings=expanded[i]["readings"],
)
return expanded

Expand Down Expand Up @@ -511,7 +512,8 @@ def expand_kXHC1983(
substituted=g["substituted"] == "*",
)
expanded[i] = kXHC1983Dict(
locations=expanded[i]["locations"], reading=expanded[i]["reading"]
locations=expanded[i]["locations"],
reading=expanded[i]["reading"],
)
return expanded

Expand Down Expand Up @@ -592,7 +594,7 @@ def expand_kRSAdobe_Japan1_6(value: t.List[str]) -> t.List[kRSAdobe_Japan1_6Dict
"radical": int(g["radical"]),
"strokes": int(g["strokes"]),
"strokes-residue": int(g["strokes_residue"]),
}
},
)
return expanded

Expand Down Expand Up @@ -630,7 +632,7 @@ def expand_kCihaiT(value: t.List[str]) -> t.List[kCihaiTDict]:
"page": int(m["page"]),
"row": int(m["row"]),
"character": int(m["character"]),
}
},
)
return expanded

Expand Down Expand Up @@ -731,7 +733,7 @@ def expand_kFenn(value: t.List[str]) -> t.List[kFennDict]:
assert g is not None

expanded[i] = kFennDict(
{"phonetic": g["phonetic"], "frequency": g["frequency"]}
{"phonetic": g["phonetic"], "frequency": g["frequency"]},
)
return expanded

Expand Down Expand Up @@ -762,7 +764,7 @@ def expand_kHanyuPinlu(value: t.List[str]) -> t.List[kHanyuPinluDict]:
assert g is not None

expanded[i] = kHanyuPinluDict(
{"phonetic": g["phonetic"], "frequency": int(g["frequency"])}
{"phonetic": g["phonetic"], "frequency": int(g["frequency"])},
)
return expanded

Expand Down Expand Up @@ -954,7 +956,7 @@ def expand_kGSR(value: t.List[str]) -> t.List[kGSRDict]:
"set": int(g["set"]),
"letter": g["letter"],
"apostrophe": g["apostrophe"] == "'",
}
},
)
return expanded

Expand Down Expand Up @@ -1071,7 +1073,7 @@ def expand_kStrange(
kStrangeDict(
property_type=property_type,
characters=characters,
)
),
)
return expanded

Expand Down Expand Up @@ -1134,7 +1136,7 @@ def expand_kMojiJoho(
serial_number=serial_number,
variation_sequence=variation_sequence,
standard=serial_number == default_serial,
)
),
)
return kMojiJohoDict(
serial_number=default_serial,
Expand Down
2 changes: 1 addition & 1 deletion src/unihan_etl/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Options:
zip_path: pathlib.Path = UNIHAN_ZIP_PATH
work_dir: pathlib.Path = WORK_DIR
fields: t.Sequence[str] = dataclasses.field(
default_factory=lambda: INDEX_FIELDS + UNIHAN_FIELDS
default_factory=lambda: INDEX_FIELDS + UNIHAN_FIELDS,
)
format: t.Literal["json", "csv", "yaml", "python"] = "csv"
input_files: t.List[str] = dataclasses.field(default_factory=lambda: UNIHAN_FILES)
Expand Down
15 changes: 10 additions & 5 deletions src/unihan_etl/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def unihan_full_options(unihan_full_path: pathlib.Path) -> UnihanOptions:

@pytest.fixture(scope="session")
def unihan_full_packager(
unihan_full_path: pathlib.Path, unihan_full_options: "UnihanOptions"
unihan_full_path: pathlib.Path,
unihan_full_options: "UnihanOptions",
) -> "Packager":
"""Return Packager for "full" portion of UNIHAN, return a UnihanOptions."""
return Packager(unihan_full_options)
Expand Down Expand Up @@ -224,7 +225,8 @@ def unihan_quick_options(

@pytest.fixture(scope="session")
def unihan_quick_packager(
unihan_quick_path: pathlib.Path, unihan_quick_options: "UnihanOptions"
unihan_quick_path: pathlib.Path,
unihan_quick_options: "UnihanOptions",
) -> "Packager":
"""Bootstrap a small, but effective portion of UNIHAN, return a UnihanOptions."""
return Packager(unihan_quick_options)
Expand Down Expand Up @@ -358,7 +360,8 @@ def unihan_home_user_name() -> str:

@pytest.fixture(scope="session")
def unihan_user_path(
unihan_home_path: pathlib.Path, unihan_home_user_name: str
unihan_home_path: pathlib.Path,
unihan_home_user_name: str,
) -> pathlib.Path:
"""Return temporary user directory.

Expand Down Expand Up @@ -427,15 +430,17 @@ def unihan_mock_test_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Pa

@pytest.fixture(scope="session")
def unihan_mock_zip_path(
unihan_mock_test_dir: pathlib.Path, unihan_mock_zip_pathname: str
unihan_mock_test_dir: pathlib.Path,
unihan_mock_zip_pathname: str,
) -> pathlib.Path:
"""Return path to Unihan zipfile."""
return unihan_mock_test_dir / unihan_mock_zip_pathname


@pytest.fixture(scope="session")
def unihan_mock_zip(
unihan_mock_zip_path: pathlib.Path, unihan_quick_data: str
unihan_mock_zip_path: pathlib.Path,
unihan_quick_data: str,
) -> zipfile.ZipFile:
"""Return Unihan zipfile."""
zf = zipfile.ZipFile(str(unihan_mock_zip_path), "a")
Expand Down
6 changes: 5 additions & 1 deletion src/unihan_etl/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ class ReportHookFn(t.Protocol):
"""Progress bar callback for download()."""

def __call__(
self, count: int, block_size: int, total_size: int, out: t.IO[str] = sys.stdout
self,
count: int,
block_size: int,
total_size: int,
out: t.IO[str] = sys.stdout,
) -> object:
"""Print progress bar during download."""
...
Expand Down
7 changes: 5 additions & 2 deletions src/unihan_etl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ def ucnstring_to_unicode(ucn_string: str) -> str:


def _dl_progress(
count: int, block_size: int, total_size: int, out: t.IO[str] = sys.stdout
count: int,
block_size: int,
total_size: int,
out: t.IO[str] = sys.stdout,
) -> None:
"""
MIT License: https://github.com/okfn/dpm-old/blob/master/dpm/util.py.
Expand Down Expand Up @@ -112,7 +115,7 @@ def format_size(_bytes: int) -> str:
percent,
int(round(percent / 2)) * "=",
int(round(50 - percent / 2)) * " ",
)
),
)
out.flush()
if maxdownloaded >= total_size:
Expand Down
Loading
Loading