Skip to content

Commit

Permalink
updated 'wdu' code to save unit representations in the representation…
Browse files Browse the repository at this point in the history
…s table and not the wdunits table
  • Loading branch information
stuchalk committed Dec 17, 2024
1 parent 563fbe0 commit f7a48ae
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 77 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "pypi"
attrs = ">=24.2.0"
bs4 = ">=0.0.2"
charset-normalizer = ">=3.4.0"
django = ">=5.1.3"
django = ">=5.1.4"
GTC = ">=1.5.1"
gunicorn = ">=23.0.0"
html5 = ">=0.0.9"
Expand Down
106 changes: 53 additions & 53 deletions dashboard/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from units.functions import *
from wdfunctions import *


choice = 'wdc'

local = timezone("America/New_York")
Expand Down Expand Up @@ -553,7 +552,6 @@
else:
print("found '" + str(ent.value) + "' (" + str(ent.id) + ")")


# get list of unit of measurement subclasses on Wikidata
if choice == 'wdc':
classes = wdclasses() # call class to update if working on wikidata OR download from wd and parse below
Expand Down Expand Up @@ -630,6 +628,9 @@
# units = wdunits() # call class to update wdunits if working on wikidata OR download from wd and parse below
# query to server not working currently (11/14/24)

# define variables
repsysids = {"qudt": 10, "iev": 21, "igb": 3, "ncit": 9, "ucum": 2, "unece": 6, "uom": 13, "wolf": 20, "wur": 23}

units = None
file = f'umis_units_query_111524.json'
if os.path.exists(os.path.join(BASE_DIR, STATIC_URL, file)):
Expand All @@ -641,74 +642,73 @@

cnt = 0
for unit in units:
# print(unit)
# exit()
# add/update wikidata entry
flds = ['curl', 'cls', 'uurl', 'unit', 'qurl', 'quant', 'factor', 'iev', 'igb', 'ncit',
'qudt', 'ucum', 'unece', 'uom', 'wolf', 'wur']
uflds = ['iev', 'igb', 'ncit', 'qudt', 'ucum', 'unece', 'uom', 'wolf', 'wur']
dt = local.localize(datetime.now())
if not isinstance(unit['uurl'], str):
found = Wdunits.objects.filter(uurl__exact=unit['uurl']['value'])
else:
found = Wdunits.objects.filter(uurl__exact=unit['uurl'])

# add unit if not found
if not found:
# convert fields to be consistent across sources
keys = unit.keys()
for fld in flds:
if fld not in keys:
# only via qwikidata are fields not set
unit.update({fld: None})
unit.update({fld: None})
if not isinstance(fld, str):
setattr(unit, fld, getattr(unit, fld))
# print(unit)
# exit()

wu = Wdunits(
cls=unit['cls'],
unit=unit['unit'],
quant=unit['quant'],
factor=unit['factor'],
curl=unit['curl'],
uurl=unit['uurl'],
qurl=unit['qurl'],
iev=unit['iev'],
igb=unit['igb'],
ncit=unit['ncit'],
qudt=unit['qudt'],
ucum=unit['ucum'],
unece=unit['unece'],
uom=unit['uom'],
wolf=unit['wolf'],
wur=unit['wur'],
added=date.today(),
updated=local.localize(datetime.now())
)
cls = Wdclasses.objects.filter(url__exact=unit['curl'])
if cls:
wu.wdclass_id = cls[0].id
wu.save()
print("added '" + wu.unit + "' (" + str(wu.id) + ")")
cnt += 1
if cnt == 300:
exit()
# add unit
wu = Wdunits(cls=unit['cls'], unit=unit['unit'], quant=unit['quant'], factor=unit['factor'],
curl=unit['curl'], uurl=unit['uurl'], qurl=unit['qurl'], added=date.today(), updated=dt
)
else:
# check for unit representation data and add if the field is empty
f = found[0]
# print(unit.keys())
# print(f.__dict__)
# exit()
for fld in flds:
if fld in unit.keys() and getattr(f, fld) is None:
setattr(f, fld, unit.get(fld))
f.save()
print("field " + fld + " updated for unit " + f.unit)
# check for unit class being set
if f.wdclass_id is None:
cls = Wdclasses.objects.filter(url__exact=unit['curl'])
if cls:
f.wdclass_id = cls[0].id
f.save()
print("added class '" + str(f.wdclass_id) + "'")
print("found unit '" + f.unit + "'")
wu = found[0]

# get and add wd unit class if available
cls = Wdclasses.objects.filter(url__exact=unit['curl'])
if cls:
wu.wdclass_id = cls[0].id
# save to wdunits table
wu.save()

# add any unit reps to the representations table
for ufld in uflds:
repsysid, strng = None, None
if unit[ufld]:
# find unit string in the strng table
strng = Strngs.objects.get(string=unit[ufld])
if not strng:
# add new unit string
strng = Strngs(string=unit[ufld], status='current', autoadded='yes', updated=dt)

repsysid = repsysids[ufld]
rep = Representations.objects.get(repsystem__id=repsysid, strng_id=strng.id)
if rep:
# update representation entry with wdunit_id
rep.wdunit_id = wu.id
rep.onwd = 'yes'
rep.save()
else:
# add representation entry
urlep = 'no'
if ufld in ["qudt", "iev", "igb", "ncit", "uom"]:
urlep = 'yes'
newrep = Representations(wdunit_id=wu.id, repsystem_id=repsysid, strng_id=strng.id,
url_endpoint=urlep, status='current', onwd='yes', checked='no', updated=dt)
newrep.save()

print("added '" + wu.unit + "' (" + str(wu.id) + ")")
cnt += 1
if cnt > 0:
exit()


# get a list of quantities on wikidata
if choice == 'wdq':
Expand Down Expand Up @@ -737,7 +737,7 @@
else:
tmp = re.findall(r'alttext="\{(.+?)\}"', isq)
isq = (tmp[0].replace("\\displaystyle", '').replace("\\mathsf", '').replace(' ', '').
replace('{{', '').replace('}}', ''))
replace('{{', '').replace('}}', ''))
quant['isq'] = isq
else:
quant['isq'] = None
Expand Down
62 changes: 39 additions & 23 deletions dashboard/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@
local = timezone("America/New_York")


# move reps from wdunits table to the representations table as links
def movereps(sys):
repsysids = {"qudt": 10}
repsysids = {"qudt": 10, "iev": 21, "igb": 3, "ncit": 9, "ucum": 2, "unece": 6, "uom": 13, "wolf": 20, "wur": 23}
repsysid = repsysids[sys]
wtmp = Wdunits.objects.filter(qudt__isnull=False).order_by('qudt').values_list('id', 'qudt')
wtmp = None
fn__isnull = sys + "__isnull"
# search for varible as field name uses ** construct below
wtmp = Wdunits.objects.filter(**{fn__isnull: False}).order_by(sys).values_list('id', sys)
wunts = {}
for w in wtmp:
wunts.update({w[0]: w[1]})
Expand All @@ -33,35 +37,47 @@ def movereps(sys):
strid = rs[0]['strng_id']
rep = Representations.objects.get(repsystem__id=repsysid, strng_id=strid)
rep.wdunit_id = wdid
rep.onwd = 'yes'
rep.save()
print("linked wdunit " + wunt)
# remove entry (clean) from the wdunits table
w = Wdunits.objects.get(id=wdid)
if sys == "qudt":
w.qudt = None
w.save()
print(str(count) + " linked wdunit " + wunt)
else:
print("no matches: " + str(wunt))
# check if the string is already in the strngs table but associated with another repsystem
strngs = Strngs.objects.filter(string=wunt)
dt = local.localize(datetime.now())
# add to strngs table
strng = Strngs(string=wunt, status='current', autoadded='yes', updated=dt)
strng.save()
if not strngs:
print("no matches: " + wunt)
# add to strngs table
strng = Strngs(string=wunt, status='current', autoadded='yes', updated=dt)
strng.save()
if strng.id:
print("added string: " + wunt)
else:
print(wunt + " not added as string!")
exit()
else:
strng = strngs[0]
# add to representations table
urlep = None
if sys == "qudt":
urlep = 'no'
if sys in ["qudt", "iev", "igb", "ncit", "uom"]:
urlep = 'yes'
newrep = Representations(wdunit_id=wdid, repsystem_id=repsysid, strng_id=strng.id,
url_endpoint=urlep, status='current', checked='no', updated=dt)
url_endpoint=urlep, status='current', onwd='yes', checked='no', updated=dt)
newrep.save()
print("added wdunit " + wunt)
# remove entry (clean) from the wdunits table
w = Wdunits.objects.get(id=wdid)
if sys == "qudt":
w.qudt = None
w.save()
if count > 4:
if newrep.id:
print(str(count) + " added wdunit: " + wunt)
else:
print(wunt + " not added as representation!")
exit()

# remove entry (clean) from the wdunits table
w = Wdunits.objects.get(id=wdid)
# update field based on variable for field name uses setattr function
setattr(w, sys, None)
w.save()

if count > 299:
exit()
exit()


movereps("qudt")
movereps("wur")
1 change: 1 addition & 0 deletions units/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ class Representations(models.Model):
url_endpoint = models.CharField(max_length=3, blank=True, null=True)
status = models.CharField(max_length=7, blank=True, null=True)
checked = models.CharField(max_length=3)
onwd = models.CharField(max_length=3)
updated = models.DateTimeField()

class Meta:
Expand Down

0 comments on commit f7a48ae

Please sign in to comment.