From 4a709ab7bc48f9516c0569dfa6a0be212268b2e4 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 4 Feb 2024 20:26:09 -0600 Subject: [PATCH] !squash wip lets see --- src/unihan_etl/validator.py | 60 +++++++++++++++++++++++++++++++++++++ tests/test_validator.py | 15 ++++++++++ 2 files changed, 75 insertions(+) create mode 100644 src/unihan_etl/validator.py create mode 100644 tests/test_validator.py diff --git a/src/unihan_etl/validator.py b/src/unihan_etl/validator.py new file mode 100644 index 00000000..34d7f2c4 --- /dev/null +++ b/src/unihan_etl/validator.py @@ -0,0 +1,60 @@ +import typing as t + +import pydantic + +from unihan_etl.expansion import expand_kTGHZ2013 + + +class UCNBaseModel(pydantic.BaseModel): + """Core model for UCN data.""" + + ucn: str + + +class kTGHZ2013Location(pydantic.BaseModel): + """Core model for location.""" + + page: int + position: int + # 0 for a main entry and greater than 0 for a parenthesized or bracketed variant # + # of the main entry + entry_type: int + + +class kTGHZ2013(UCNBaseModel): + """kTGHZ2013 model.""" + + reading: str + locations: list[kTGHZ2013Location] + + model_config = pydantic.ConfigDict(validate_assignment=True) + + @pydantic.model_validator(mode="before") + @classmethod + def from_raw(cls, raw: str) -> "kTGHZ2013": + pass + if len(raw) == 1: + ucn, field, val = raw[0].split("\t") + print(ucn, field, val) + + return cls(ucn=ucn, **expand_kTGHZ2013([val])) + + def __init__(self, *args, **kwargs) -> None: + if args: + if len(args) == 1: + ucn, field, val = args[0].split("\t") + print(ucn, field, val) + + return super().__init__(ucn=ucn, **expand_kTGHZ2013([val])) + + if len(args) > 1: + raise ValueError( + "If a position argument is used, only 1 is allowed to set `actual_instance`" + ) + if kwargs: + raise ValueError( + "If a position argument is used, keyword arguments cannot be used." + ) + super().__init__(actual_instance=args[0]) + else: + super().__init__(**kwargs) diff --git a/tests/test_validator.py b/tests/test_validator.py new file mode 100644 index 00000000..17de6bb2 --- /dev/null +++ b/tests/test_validator.py @@ -0,0 +1,15 @@ +"""Test expansion of multi-value fields in UNIHAN.""" +import typing as t + +import pytest + +from unihan_etl import constants, validator + +if t.TYPE_CHECKING: + from typing_extensions import TypeAlias + + +def test_kTGHZ2013() -> None: + # model = validator.kTGHZ2013("U+3447 kTGHZ2013 482.140:zhòu") + model = validator.kTGHZ2013.from_raw("U+3447 kTGHZ2013 482.140:zhòu") + assert model == "U+3447"