Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to exchange reference datatype by list_item_id #63

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 78 additions & 14 deletions arches_controlled_lists/datatypes/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from arches.app.datatypes.base import BaseDataType
from arches.app.models.models import Node
from arches.app.models.graph import GraphValidationError
from arches.app.utils.i18n import rank_label

from arches_controlled_lists.models import ListItem

Expand All @@ -27,9 +28,9 @@ class Reference:


class ReferenceDataType(BaseDataType):
def to_python(self, value):
def to_python(self, value) -> list[Reference]:
if value is None:
return None
return []
if not value:
raise ValueError(_("Reference datatype value cannot be empty"))

Expand All @@ -54,6 +55,30 @@ def serialize(self, value):
]
return value

def to_representation(self, value):
references = self.to_python(value)
return [
{
"list_item_id": reference.labels[0].list_item_id,
"display_value": self.best_label(reference.labels).value,
}
for reference in references
]

def best_label(self, labels: list[ReferenceLabel]):
if not labels:
return None
ranked_labels = sorted(
labels,
key=lambda label: rank_label(
kind=label.valuetype_id,
source_lang=label.language_id,
target_lang=get_language(),
),
reverse=True,
)
return ranked_labels[0]

def validate(
self,
value,
Expand All @@ -67,6 +92,7 @@ def validate(
try:
parsed = self.to_python(value)
self.validate_pref_labels(parsed)
self.validate_list_item_consistency(parsed)
self.validate_multivalue(parsed, node, nodeid)
except Exception as e:
return [self.transform_exception(e)]
Expand All @@ -85,6 +111,15 @@ def validate_pref_labels(self, references: list[Reference] | None):
msg = _("A reference can have only one prefLabel per language")
raise ValueError(msg)

def validate_list_item_consistency(self, references: list[Reference] | None):
if references is None:
return None
for reference in references:
list_item_ids = {ref.list_item_id for ref in reference.labels}
if len(list_item_ids) != 1:
msg = _("Found multiple list items among labels: {reference}")
raise ValueError(msg)

def validate_multivalue(self, parsed, node, nodeid):
if not parsed:
return
Expand Down Expand Up @@ -120,19 +155,48 @@ def transform_exception(e):
}

def transform_value_for_tile(self, value, **kwargs):
list_id = kwargs.get("controlledList")
value = self.serialize(value)
if (
isinstance(value, list)
and isinstance(value[0], dict)
and "value" in value[0]
):
value = value[0]["value"]
if isinstance(value, str):
found_item = self.lookup_listitem_from_label(value, list_id)
if value is None:
return None
if not isinstance(value, list):
value = [value]

# Pre-process to discard display values generated by to_representation().
pre_processed_values = []
for single_value in value:
if isinstance(single_value, dict) and (
list_item_id := single_value.get("list_item_id")
):
pre_processed_values.append(list_item_id)
else:
pre_processed_values.append(single_value)

final_tile_values = []
for single_value in pre_processed_values:
found_item: ListItem | None = None
match single_value:
case Reference():
found_item = ListItem.objects.filter(
pk=single_value.labels[0].list_item_id
).first()
case uuid.UUID():
found_item = ListItem.objects.filter(pk=list_item_id).first()
case str():
try:
list_item_id = uuid.UUID(single_value)
except ValueError:
list_id = kwargs.get("controlledList")
found_item = self.lookup_listitem_from_label(
single_value, list_id
)
else:
found_item = ListItem.objects.filter(pk=list_item_id).first()
case dict():
final_tile_values.append(single_value)

if found_item:
value = [found_item.build_tile_value()]
return value
final_tile_values.append(found_item.build_tile_value())

return final_tile_values

def lookup_listitem_from_label(self, value, list_id):
return (
Expand Down
51 changes: 45 additions & 6 deletions tests/reference_datatype_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,21 @@ def test_validate(self):
self.assertEqual(len(errors), 1, errors)
self.assertEqual(errors[0]["message"], message)

mock_list_item_id = uuid.uuid4()
data = {
"uri": "https://www.domain.com/label",
"labels": [
{
"id": "23b4efbd-2e46-4b3f-8d75-2f3b2bb96af2",
"value": "label",
"language_id": "en",
"list_item_id": str(uuid.uuid4()),
"list_item_id": str(mock_list_item_id),
"valuetype_id": "prefLabel",
},
{
"id": "e8676242-f0c7-4e3d-b031-fded4960cd86",
"language_id": "de",
"list_item_id": str(uuid.uuid4()),
"list_item_id": str(mock_list_item_id),
"valuetype_id": "prefLabel",
},
],
Expand All @@ -87,6 +88,12 @@ def test_validate(self):
data["labels"][1]["language_id"] = "de"
data["labels"][1]["list_item_id"] = str(uuid.uuid4())

# Mixed list_item_id values
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(len(errors), 1, errors)

data["labels"][1]["list_item_id"] = str(mock_list_item_id)

# Valid
errors = reference.validate(value=[data], node=mock_node)
self.assertEqual(errors, [])
Expand Down Expand Up @@ -145,19 +152,25 @@ def test_dataclass_roundtrip(self):
config = {"controlledList": list1_pk}
tile_val = reference.transform_value_for_tile("label1-pref", **config)
materialized = reference.to_python(tile_val)
# This transformation will visit the database.
tile_val_reparsed = reference.transform_value_for_tile(materialized, **config)
self.assertEqual(tile_val_reparsed, tile_val)
# This one will not.
serialized_reference = reference.serialize(materialized)
self.assertEqual(serialized_reference, tile_val)
# Also test None.
self.assertIsNone(reference.serialize(None))

def test_transform_value_for_tile(self):
reference = DataTypeFactory().get_instance("reference")
list1_pk = str(List.objects.get(name="list1").pk)
config = {"controlledList": list1_pk}

tile_value1 = reference.transform_value_for_tile("label1-pref", **config)
self.assertTrue(isinstance(tile_value1, list))
self.assertTrue("uri" in tile_value1[0])
self.assertTrue("labels" in tile_value1[0])
self.assertTrue("list_id" in tile_value1[0])
self.assertIsInstance(tile_value1, list)
self.assertIn("uri", tile_value1[0])
self.assertIn("labels", tile_value1[0])
self.assertIn("list_id", tile_value1[0])

self.assertIsNone(reference.transform_value_for_tile(None, **config))

Expand All @@ -177,6 +190,28 @@ def test_transform_value_for_tile(self):
tile_value2[0]["labels"][0]["list_item_id"], expected_list_item_pk
)

def test_to_representation(self):
reference = DataTypeFactory().get_instance("reference")
list_item_value = ListItemValue.objects.get(
value="label1-pref", list_item__list__name="list1"
)
config = {"controlledList": str(list_item_value.list_item.list_id)}
tile_val = reference.transform_value_for_tile("label1-pref", **config)

representation = reference.to_representation(tile_val)

self.assertEqual(
representation,
[
{
"list_item_id": str(list_item_value.list_item.pk),
"display_value": "label1-pref",
}
],
)

self.assertIsNone(reference.to_representation(None))

def test_get_display_value(self):
reference = DataTypeFactory().get_instance("reference")
mock_node = SimpleNamespace(nodeid="72048cb3-adbc-11e6-9ccf-14109fd34195")
Expand Down Expand Up @@ -230,3 +265,7 @@ def test_get_display_value(self):
}
)
self.assertEqual(reference.get_display_value(mock_tile2, mock_node), "")

def test_collects_multiple_values(self):
reference = DataTypeFactory().get_instance("reference")
self.assertIs(reference.collects_multiple_values(), True)
Loading