From dd6fa6340c788cb53cb241b072105bc723de4d1e Mon Sep 17 00:00:00 2001
From: Emerson Rocha <rocha@ieee.org>
Date: Wed, 1 Dec 2021 22:34:36 -0300
Subject: [PATCH] #11, #12: partial refactoring

---
 bin/hxltmcli.py             |  1 +
 ontologia/cor.hxltm.215.yml | 78 ++++++++++++++++++++++++-------------
 2 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/bin/hxltmcli.py b/bin/hxltmcli.py
index f8fb151..9e68f7e 100755
--- a/bin/hxltmcli.py
+++ b/bin/hxltmcli.py
@@ -5262,6 +5262,7 @@ def est_validum_ad_regula2(
         python_match = regulam_regex.match(textum_aut_regulam['hxl'])
 
         if not python_match:
+            # print('not python_match:', textum_aut_regulam, python_match)
             return False
 
         # TODO: _strictum could be used on the strategy to check if matches
diff --git a/ontologia/cor.hxltm.215.yml b/ontologia/cor.hxltm.215.yml
index 8d0e89a..f6ac4f1 100644
--- a/ontologia/cor.hxltm.215.yml
+++ b/ontologia/cor.hxltm.215.yml
@@ -2999,7 +2999,9 @@ ontologia_aliud_familiam:
 # Trivia: rēgulam, https://en.wiktionary.org/wiki/regula#Latin
 ontologia_regulam:
   # @see https://regex101.com/ (online regex tester, multiple engines)
+  # @see https://en.wikipedia.org/wiki/Comparison_of_regular_expression_engines
   # @see https://docs.python.org/3/howto/regex.html
+  # @see https://learnbyexample.github.io/py_regular_expressions/groupings-and-backreferences.html
   # @see https://developer.mozilla.org/en-US/docs/Web/JavaScript
   #      /Reference/Global_Objects/RegExp
   # @see https://pkg.go.dev/regexp/syntax
@@ -3030,33 +3032,51 @@ ontologia_regulam:
     # basim -> divisionem, classem, speciem
     basim:
       # https://regex101.com/r/XUOncM/5
+      # https://regex101.com/r/Ff27ID/3
       javascript: >-
         (?<divisionem>(#item|#meta))(?<classem>(\+conceptum|\+linguam|\+terminum))((?<linguam_de>(\+ix_de_[a-z_]*))|(?<linguam_est>(\+ix_est_[a-z_]*))|(?<linguam_i2a>(\+i_\w\w))?(?<linguam_i3a>(\+i_\w\w\w))(?<linguam_ig>(\+ig_\w\w\w\w\d\d\d\d))?((?<linguam_s4a>(\+is_\w{3,4})))(?<linguam_it>(\+it_[a-z0-9_]*))?)?(?<etcetera>(\+.*))?(?<datum_vocabularium>(\+v_[a-z_]*))?
       #  \#(?<divisionem>(item|meta)).+?(?<classem>(conceptum|linguam|terminum))(?<speciem>.*)
+      # https://learnbyexample.github.io/py_regular_expressions/groupings-and-backreferences.html
       python: |
+        # I: Abstractum (#meta) aut concretum (#item)
         (?P<divisionem>
           (\#item|\#meta)
         )
+        # II: Classem: +conceptum, +linguam aut +terminum
         (?P<classem>
           (\+conceptum|\+linguam|\+terminum)
         )
-        (
-          (?P<linguam_de>(\+ix_de_[a-z_]*))
-          |
-          (?P<linguam_est>(\+ix_est_[a-z_]*))
-          |
-          (?P<linguam_i2a>(\+i_\w\w))?
-          (?P<linguam_i3a>(\+i_\w\w\w))
-          (?P<linguam_ig>(\+ig_\w\w\w\w\d\d\d\d))?
-          ((?P<linguam_s4a>(\+is_\w{3,4})))
-          (?P<linguam_it>(\+it_[a-z0-9_]*))?
-        )?
-        (?P<etcetera>
-          (\+.*)
-        )?
-        (?P<datum_vocabularium>
-          (\+v_[a-z_]*)
+        # III: +linguam et +terminum; quod linguam?
+        # _[eng-Lant]Know non-enforcement: this will tolerate if classem =+conceptum[eng-Latn]_
+        ((?<!conceptum)
+          # III.I: implicitum aut explicitum
+          ## implicitum est
+          (
+            (?P<linguam_implicitum_de>(\+ii_de_linguam[a-z_]*))
+            |
+            (?P<linguam_implicitum_est>(\+ii_est_linguam[a-z_]*))
+          )
+          | # ...aut...
+          ## explicitum est
+          (
+            (?P<linguam_iso639_1_a>(\+i_\w\w))?
+            (?P<linguam_iso639_3_a>(\+i_\w\w\w))         # requisitum!
+            (?P<linguam_glotto>(\+ig_[a-z]{4}\d{4}))?
+            (  # Locum
+              (?P<linguam_iso3166_2_a>(\+ir_[a-z]{2}))
+              |
+              (?P<linguam_iso3166_3_a>(\+ir_[a-z]{3}))
+              |
+              (?P<linguam_unm49>(\+ir_[0-9]{3}))
+            )?
+            (  # scriptum codicem: requisitum!
+              ((?P<linguam_iso15924_a>(\+is_[a-z]{4})))
+              |
+              ((?P<linguam_iso15924_n>(\+is_[0-9]{3})))
+            )
+          )
         )?
+        .*
     # subspeciem:
     #   javascript: >-
     #     \(?<divisionem>(#item|#meta)).+?(?<classem>(conceptum|linguam|terminum))(?<speciem>.*)
@@ -3065,6 +3085,7 @@ ontologia_regulam:
 
     # https://regex101.com/r/ijNoTe/1
     # https://regex101.com/delete/nERE0vlhhSmLY2ircayaduP8
+    # https://regex101.com/r/Ff27ID/2
   # named group:
   #  (?P<hxltag>\#[a-zA-Z_]*)(?P<hxlattrs>\+\w*){0,20}
 
@@ -3137,23 +3158,24 @@ ontologia_regulam:
     python: '(^#item|^#meta)\+terminum'
 
 ### Example test cases
-#item+conceptum
-#meta+conceptum
+#meta+linguam+i_en+i_eng+ig_stan1293+ir_076+is_latn+it_en_por_latn+ib_x_ambigua
+#item+terminum+ii_de_linguam_fontem
+#meta+conceptum+i_en+i_eng+is_latn
 #item+conceptum+codicem
 #meta+linguam+i_en+i_eng+is_latn
-#meta+linguam+i_pt+i_por+ig_port1283+is_latn+it_en_por_latn+ib_x_ambigua
+#meta+linguam+i_en+i_eng+ig_stan1293+is_latn+it_en_por_latn+ib_x_ambigua
 #meta+linguam+i_en+i_eng+is_215
-#item+terminum+ib_h_est_linguam+v_linguam_maximum
-#item+terminum+ib_h_est_linguam+v_linguam_a
-#item+terminum+ib_h_est_linguam+v_linguam
-#item+terminum+ib_h_de_linguam
-#item+terminum+ib_h_est_linguam_fonte+v_lngam
-#item+terminum+ib_h_est_inguam_objectivum
-#item+terminum+ib_h_de_linguam_fontem
-#item+terminum+ib_h_de_linguam_objectivum
+#item+terminum+ii_est_linguam+v_linguam_maximum
+#item+terminum+ii_est_linguam+v_linguam_a
+#item+terminum+ii_est_linguam+v_linguam
+#item+terminum+ii_de_linguam
+#item+terminum+ii_est_linguam_fonte+v_lngam
+#item+terminum+ii_est_linguam_objectivum
+#item+terminum+ii_de_linguam_fontem
 #item+terminum+i_en+i_eng+is_latn+rem
-#meta+linguam+i_pt+i_por+ig_port1283+is_latn+izb47_t_en_por_latn+ib_x_ambigua
 
+## Dummy example I
+#item+terminum
 
 
 # Regexes test