From 008ee4df19400d80a00f9600ec1b1f9cf3608ec9 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Mon, 5 Feb 2024 07:11:17 -0600 Subject: [PATCH] !squash more --- src/unihan_etl/validator.py | 78 ++++++++++++++++++++++++------------- tests/test_validator.py | 9 ++++- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/src/unihan_etl/validator.py b/src/unihan_etl/validator.py index 91154cf7..5bb5c45b 100644 --- a/src/unihan_etl/validator.py +++ b/src/unihan_etl/validator.py @@ -18,38 +18,62 @@ class kTGHZ2013Location(pydantic.BaseModel): position: int # 0 for a main entry and greater than 0 for a parenthesized or bracketed variant # # of the main entry - entry_type: int + entry_type: int = pydantic.Field( + description="0 for a main entry and greater than 0 for a parenthesized or bracketed variant of the main entry" + ) -class kTGHZ2013(UCNBaseModel): +class kTGHZ2013Reading(pydantic.BaseModel): """kTGHZ2013 model.""" reading: str locations: list[kTGHZ2013Location] - model_config = pydantic.ConfigDict(validate_assignment=True) - - # @classmethod - # def validator(cls, raw: str) -> "kTGHZ2013": - # if len(raw) == 1: - # ucn, field, val = raw[0].split("\t") - # print(ucn, field, val) - # out = expand_kTGHZ2013([val]) - # - # return cls( - # ucn=ucn, reading=out[0]["reading"], locations=out[0]["locations"] - # ) - # - - def __init__(self, *args, **kwargs) -> None: - if args and len(args) == 1: - ucn, field, val = args[0].split("\t") - print(ucn, field, val) - - out = expand_kTGHZ2013([val]) - - return super().__init__( - ucn=ucn, reading=out[0]["reading"], locations=out[0]["locations"] - ) - return super().__init__(*args, **kwargs) +class kTGHZ2013(UCNBaseModel): + """kTGHZ2013 model.""" + + readings: list[kTGHZ2013Reading] + + model_config = pydantic.ConfigDict( + validate_assignment=True, + arbitrary_types_allowed=True, + ) + + @classmethod + def from_string(cls, value: str) -> "kTGHZ2013": + """Accept csv valdation from UNIHAN.""" + if isinstance(value, str): + ucn, field, val = value.split("\t") + outs = expand_kTGHZ2013(val.split(" ")) + + return cls( + ucn=ucn, + readings=[ + kTGHZ2013Reading( + reading=out["reading"], + locations=[ + kTGHZ2013Location( + page=loc["page"], + position=loc["position"], + entry_type=loc["entry_type"], + ) + for loc in out["locations"] + ], + ) + for out in outs + ], + ) + # reading=out["reading"], + # locations=[ + # kTGHZ2013Location( + # page=location["page"], + # position=location["position"], + # entry_type=location["entry_type"], + # ) + # for location in out["locations"] + # ], + # ) + elif isinstance(value, dict): + return pydantic.parse_obj_as(cls, value) + raise pydantic.ValidationError("Invalid input for kTGHZ2013 model.") # noqa: TRY003 diff --git a/tests/test_validator.py b/tests/test_validator.py index f70eb8e2..c1d89c82 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -10,5 +10,12 @@ def test_kTGHZ2013() -> None: - model = validator.kTGHZ2013("U+3447 kTGHZ2013 482.140:zhòu") + model = validator.kTGHZ2013.from_string("U+3447 kTGHZ2013 482.140:zhòu") assert model.ucn == "U+3447" + + model = validator.kTGHZ2013.from_string( + "U+4E07 kTGHZ2013 256.090:mò 379.160:wàn" + ) + assert model.ucn == "U+4E07" + + print(f"\n{model}\n")