From f7a48ae035bced4c6391420ce80ae0f29ea01a58 Mon Sep 17 00:00:00 2001 From: Stuart Chalk Date: Tue, 17 Dec 2024 05:32:14 -0500 Subject: [PATCH] updated 'wdu' code to save unit representations in the representations table and not the wdunits table --- Pipfile | 2 +- dashboard/example.py | 106 ++++++++++++++++++++--------------------- dashboard/functions.py | 62 +++++++++++++++--------- units/models.py | 1 + 4 files changed, 94 insertions(+), 77 deletions(-) diff --git a/Pipfile b/Pipfile index 9a528d3..4c439e2 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ name = "pypi" attrs = ">=24.2.0" bs4 = ">=0.0.2" charset-normalizer = ">=3.4.0" -django = ">=5.1.3" +django = ">=5.1.4" GTC = ">=1.5.1" gunicorn = ">=23.0.0" html5 = ">=0.0.9" diff --git a/dashboard/example.py b/dashboard/example.py index d20444c..12cad17 100644 --- a/dashboard/example.py +++ b/dashboard/example.py @@ -11,7 +11,6 @@ from units.functions import * from wdfunctions import * - choice = 'wdc' local = timezone("America/New_York") @@ -553,7 +552,6 @@ else: print("found '" + str(ent.value) + "' (" + str(ent.id) + ")") - # get list of unit of measurement subclasses on Wikidata if choice == 'wdc': classes = wdclasses() # call class to update if working on wikidata OR download from wd and parse below @@ -630,6 +628,9 @@ # units = wdunits() # call class to update wdunits if working on wikidata OR download from wd and parse below # query to server not working currently (11/14/24) + # define variables + repsysids = {"qudt": 10, "iev": 21, "igb": 3, "ncit": 9, "ucum": 2, "unece": 6, "uom": 13, "wolf": 20, "wur": 23} + units = None file = f'umis_units_query_111524.json' if os.path.exists(os.path.join(BASE_DIR, STATIC_URL, file)): @@ -641,74 +642,73 @@ cnt = 0 for unit in units: - # print(unit) - # exit() # add/update wikidata entry flds = ['curl', 'cls', 'uurl', 'unit', 'qurl', 'quant', 'factor', 'iev', 'igb', 'ncit', 'qudt', 'ucum', 'unece', 'uom', 'wolf', 'wur'] + uflds = ['iev', 'igb', 'ncit', 'qudt', 'ucum', 'unece', 'uom', 'wolf', 'wur'] + dt = local.localize(datetime.now()) if not isinstance(unit['uurl'], str): found = Wdunits.objects.filter(uurl__exact=unit['uurl']['value']) else: found = Wdunits.objects.filter(uurl__exact=unit['uurl']) + + # add unit if not found if not found: # convert fields to be consistent across sources keys = unit.keys() for fld in flds: if fld not in keys: # only via qwikidata are fields not set - unit.update({fld: None}) + unit.update({fld: None}) if not isinstance(fld, str): setattr(unit, fld, getattr(unit, fld)) - # print(unit) - # exit() - wu = Wdunits( - cls=unit['cls'], - unit=unit['unit'], - quant=unit['quant'], - factor=unit['factor'], - curl=unit['curl'], - uurl=unit['uurl'], - qurl=unit['qurl'], - iev=unit['iev'], - igb=unit['igb'], - ncit=unit['ncit'], - qudt=unit['qudt'], - ucum=unit['ucum'], - unece=unit['unece'], - uom=unit['uom'], - wolf=unit['wolf'], - wur=unit['wur'], - added=date.today(), - updated=local.localize(datetime.now()) - ) - cls = Wdclasses.objects.filter(url__exact=unit['curl']) - if cls: - wu.wdclass_id = cls[0].id - wu.save() - print("added '" + wu.unit + "' (" + str(wu.id) + ")") - cnt += 1 - if cnt == 300: - exit() + # add unit + wu = Wdunits(cls=unit['cls'], unit=unit['unit'], quant=unit['quant'], factor=unit['factor'], + curl=unit['curl'], uurl=unit['uurl'], qurl=unit['qurl'], added=date.today(), updated=dt + ) else: # check for unit representation data and add if the field is empty - f = found[0] - # print(unit.keys()) - # print(f.__dict__) - # exit() - for fld in flds: - if fld in unit.keys() and getattr(f, fld) is None: - setattr(f, fld, unit.get(fld)) - f.save() - print("field " + fld + " updated for unit " + f.unit) - # check for unit class being set - if f.wdclass_id is None: - cls = Wdclasses.objects.filter(url__exact=unit['curl']) - if cls: - f.wdclass_id = cls[0].id - f.save() - print("added class '" + str(f.wdclass_id) + "'") - print("found unit '" + f.unit + "'") + wu = found[0] + + # get and add wd unit class if available + cls = Wdclasses.objects.filter(url__exact=unit['curl']) + if cls: + wu.wdclass_id = cls[0].id + # save to wdunits table + wu.save() + + # add any unit reps to the representations table + for ufld in uflds: + repsysid, strng = None, None + if unit[ufld]: + # find unit string in the strng table + strng = Strngs.objects.get(string=unit[ufld]) + if not strng: + # add new unit string + strng = Strngs(string=unit[ufld], status='current', autoadded='yes', updated=dt) + + repsysid = repsysids[ufld] + rep = Representations.objects.get(repsystem__id=repsysid, strng_id=strng.id) + if rep: + # update representation entry with wdunit_id + rep.wdunit_id = wu.id + rep.onwd = 'yes' + rep.save() + else: + # add representation entry + urlep = 'no' + if ufld in ["qudt", "iev", "igb", "ncit", "uom"]: + urlep = 'yes' + newrep = Representations(wdunit_id=wu.id, repsystem_id=repsysid, strng_id=strng.id, + url_endpoint=urlep, status='current', onwd='yes', checked='no', updated=dt) + newrep.save() + + print("added '" + wu.unit + "' (" + str(wu.id) + ")") + cnt += 1 + if cnt > 0: + exit() + # get a list of quantities on wikidata if choice == 'wdq': @@ -737,7 +737,7 @@ else: tmp = re.findall(r'alttext="\{(.+?)\}"', isq) isq = (tmp[0].replace("\\displaystyle", '').replace("\\mathsf", '').replace(' ', ''). - replace('{{', '').replace('}}', '')) + replace('{{', '').replace('}}', '')) quant['isq'] = isq else: quant['isq'] = None diff --git a/dashboard/functions.py b/dashboard/functions.py index c75ad12..a298bbb 100644 --- a/dashboard/functions.py +++ b/dashboard/functions.py @@ -10,10 +10,14 @@ local = timezone("America/New_York") +# move reps from wdunits table to the representations table as links def movereps(sys): - repsysids = {"qudt": 10} + repsysids = {"qudt": 10, "iev": 21, "igb": 3, "ncit": 9, "ucum": 2, "unece": 6, "uom": 13, "wolf": 20, "wur": 23} repsysid = repsysids[sys] - wtmp = Wdunits.objects.filter(qudt__isnull=False).order_by('qudt').values_list('id', 'qudt') + wtmp = None + fn__isnull = sys + "__isnull" + # search for varible as field name uses ** construct below + wtmp = Wdunits.objects.filter(**{fn__isnull: False}).order_by(sys).values_list('id', sys) wunts = {} for w in wtmp: wunts.update({w[0]: w[1]}) @@ -33,35 +37,47 @@ def movereps(sys): strid = rs[0]['strng_id'] rep = Representations.objects.get(repsystem__id=repsysid, strng_id=strid) rep.wdunit_id = wdid + rep.onwd = 'yes' rep.save() - print("linked wdunit " + wunt) - # remove entry (clean) from the wdunits table - w = Wdunits.objects.get(id=wdid) - if sys == "qudt": - w.qudt = None - w.save() + print(str(count) + " linked wdunit " + wunt) else: - print("no matches: " + str(wunt)) + # check if the string is already in the strngs table but associated with another repsystem + strngs = Strngs.objects.filter(string=wunt) dt = local.localize(datetime.now()) - # add to strngs table - strng = Strngs(string=wunt, status='current', autoadded='yes', updated=dt) - strng.save() + if not strngs: + print("no matches: " + wunt) + # add to strngs table + strng = Strngs(string=wunt, status='current', autoadded='yes', updated=dt) + strng.save() + if strng.id: + print("added string: " + wunt) + else: + print(wunt + " not added as string!") + exit() + else: + strng = strngs[0] # add to representations table - urlep = None - if sys == "qudt": + urlep = 'no' + if sys in ["qudt", "iev", "igb", "ncit", "uom"]: urlep = 'yes' newrep = Representations(wdunit_id=wdid, repsystem_id=repsysid, strng_id=strng.id, - url_endpoint=urlep, status='current', checked='no', updated=dt) + url_endpoint=urlep, status='current', onwd='yes', checked='no', updated=dt) newrep.save() - print("added wdunit " + wunt) - # remove entry (clean) from the wdunits table - w = Wdunits.objects.get(id=wdid) - if sys == "qudt": - w.qudt = None - w.save() - if count > 4: + if newrep.id: + print(str(count) + " added wdunit: " + wunt) + else: + print(wunt + " not added as representation!") + exit() + + # remove entry (clean) from the wdunits table + w = Wdunits.objects.get(id=wdid) + # update field based on variable for field name uses setattr function + setattr(w, sys, None) + w.save() + + if count > 299: exit() exit() -movereps("qudt") +movereps("wur") diff --git a/units/models.py b/units/models.py index 1124bb2..6ceb9ca 100644 --- a/units/models.py +++ b/units/models.py @@ -382,6 +382,7 @@ class Representations(models.Model): url_endpoint = models.CharField(max_length=3, blank=True, null=True) status = models.CharField(max_length=7, blank=True, null=True) checked = models.CharField(max_length=3) + onwd = models.CharField(max_length=3) updated = models.DateTimeField() class Meta: