From cf697a98adc4edb484081e5bb496311e84283a0b Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Fri, 26 Nov 2021 15:51:11 -0300 Subject: [PATCH] bugfix: Tags like Ancient Greek, grc-Grek, should still have BCP47 (simplify templating) --- CHANGELOG.md | 4 +-- bin/hxltmcli.py | 10 ++++---- bin/hxltmdexml.py | 64 +++++++++++++++++++++++++++++++++++++++++++++-- setup.cfg | 2 +- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08fc62e..c705013 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,12 +20,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Generate XML now have less white spaces - `hxltmdexml` now also export HXLTM with deterministic ordering (based on `--agendum-linguam`) - ### Fixed - Bugfixed issue with importing back term value (only on the generic XML importer; TMX/TBX was not affected) -- Bugfixed not previsoulu detected corner case of not exporting exactly the +- Bugfixed not previously detected corner case of not exporting exactly the first item (non HXLTM formats, and roundtrip) +- Tags like Ancient Greek, grc-Grek, should still have BCP47 (simplify templating) ## [v0.8.9] - 2020-11-17 ### Added diff --git a/bin/hxltmcli.py b/bin/hxltmcli.py index 828313c..710ede0 100755 --- a/bin/hxltmcli.py +++ b/bin/hxltmcli.py @@ -2431,10 +2431,6 @@ class HXLTMDatumNormam: # pylint: disable=too-many-instance-attributes >>> HXLTMDatumNormam('356_XZ_X_wadegile_private1@museum.icom.भारत').a() '+normam_356_xz_museum_icom_भारत_x_private1_wadegile' - ->>> HXLTMLinguam('rmf-Latn').v() -{'_typum': 'HXLTMLinguam', 'crudum': 'rmf-Latn', \ -'linguam': 'rmf-Latn', 'iso6393': 'rmf', 'iso115924': 'Latn'} """ _typum: InitVar[str] = None @@ -5472,7 +5468,7 @@ class HXLTMLinguam: # pylint: disable=too-many-instance-attributes >>> HXLTMLinguam('rmf-Latn').v() {'_typum': 'HXLTMLinguam', 'crudum': 'rmf-Latn', \ -'linguam': 'rmf-Latn', 'iso6393': 'rmf', 'iso115924': 'Latn'} +'linguam': 'rmf-Latn', 'iso6393': 'rmf', 'iso115924': 'Latn', 'bcp47': 'rmf'} Kalo Finnish Romani, Latin script (no ISO 2 language, so no attr) @@ -5622,6 +5618,10 @@ def initialle(self, strictum: bool): # pylint: disable=too-many-branches if self.imperium: self.imperium = self.imperium.upper() + # Tags like Ancient Greek, grc-Grek, should still have BCP47 + if not self.bcp47 and self.iso6393: + self.bcp47 = self.iso6393 + if self.privatum is not None and len(self.privatum) > 0: # https://tools.ietf.org/search/bcp47#page-2-12 # '4.5. Canonicalization of Language Tags' diff --git a/bin/hxltmdexml.py b/bin/hxltmdexml.py index 2f48358..3ccc8f9 100755 --- a/bin/hxltmdexml.py +++ b/bin/hxltmdexml.py @@ -1537,7 +1537,7 @@ class HXLTMLinguam: # pylint: disable=too-many-instance-attributes >>> HXLTMLinguam('rmf-Latn').v() {'_typum': 'HXLTMLinguam', 'crudum': 'rmf-Latn', \ -'linguam': 'rmf-Latn', 'iso6393': 'rmf', 'iso115924': 'Latn'} +'linguam': 'rmf-Latn', 'iso6393': 'rmf', 'iso115924': 'Latn', 'bcp47': 'rmf'} Kalo Finnish Romani, Latin script (no ISO 2 language, so no attr) @@ -1617,7 +1617,7 @@ def __init__(self, linguam: str, else: self.vacuum = vacuum - def initialle(self, strictum: bool): + def initialle(self, strictum: bool): # pylint: disable=too-many-branches """ Trivia: initiāle, https://en.wiktionary.org/wiki/initialis#Latin """ @@ -1687,6 +1687,10 @@ def initialle(self, strictum: bool): if self.imperium: self.imperium = self.imperium.upper() + # Tags like ancient greek, grc-Grek, should still have BCP47 + if not self.bcp47 and self.iso6393: + self.bcp47 = self.iso6393 + if self.privatum is not None and len(self.privatum) > 0: # https://tools.ietf.org/search/bcp47#page-2-12 # '4.5. Canonicalization of Language Tags' @@ -1727,6 +1731,62 @@ def a(self): # pylint: disable=invalid-name return ''.join(resultatum).lower() + def aequale( + self, + clavem_et_linguam: Union[str, Type['HXLTMLinguam']]) -> int: + """aequāle crudum clavem? + + Args: + clavem_et_linguam (str, HXLTMLinguam): Textum crudum et linguam + + Returns: + int: aequāle numerum + """ + # @TODO: the numeric results on this function are still an usable + # draft. They can be used later to assert the closest + # option to return a viable result + + if clavem_et_linguam and isinstance(clavem_et_linguam, str): + neo = HXLTMLinguam(clavem_et_linguam) + else: + neo = clavem_et_linguam + + # print(neo.a(), self.a()) + + if neo.a() == self.a(): + return 100 + + if neo.iso6391a2 == self.iso6391a2 and \ + neo.iso6393 == self.iso6393 and \ + neo.iso115924 == self.iso115924 and \ + neo.imperium == self.imperium: + # non privatum + return 95 + + if neo.iso6391a2 == self.iso6391a2 and \ + neo.iso6393 == self.iso6393 and \ + neo.iso115924 == self.iso115924 and \ + neo.privatum == self.privatum: + # non imperium + return 95 + + if neo.iso6393 == self.iso6393 and \ + neo.iso115924 == self.iso115924 and \ + (neo.privatum == self.privatum or + neo.privatum == self.privatum): + # non iso6391a2 + # non imperium || non privatum + return 95 + + if neo.iso6393 == self.iso6393 and \ + neo.iso115924 == self.iso115924: + # non iso6391a2 + # non privatum + # non imperium + return 90 + + return -100 + def designo(self, clavem: str, rem: Any) -> Type['HXLTMLinguam']: """Designo clavem rem diff --git a/setup.cfg b/setup.cfg index be1d161..0293c61 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ # if forks do eventually exist. # The 'hxltm', as 2021-11-13, is actually not used on Python pip. name = hxltm-eticaai -version = 0.8.9 +version = 0.8.10 author = Emerson Rocha author_email = rocha@ieee.org description = Multilingual Terminology in Humanitarian Language Exchange (HXLTM) - reference implementation.