From ab25446ddc5ecd4710e4d6d2ab087b0aa86c5aa7 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Tue, 13 Aug 2024 10:16:41 -0400 Subject: [PATCH 01/25] Avoid trying to create list items that dont belong in hierarchy when concepts belong to many collections #23 --- ...002_etl_collections_to_controlled_lists.py | 151 ++++++++++-------- 1 file changed, 88 insertions(+), 63 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 8ba7021..eee5f2f 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -19,6 +19,7 @@ class Migration(migrations.Migration): ) returns text as $$ declare failed_collections text[]; + collection text; begin -- RDM Collections to Controlled Lists & List Items Migration -- -- To use, run: @@ -36,8 +37,10 @@ class Migration(migrations.Migration): -- a concept at the top of a collection does NOT have a parent list item and should have a depth of 0 -- a concept below the top concepts of the collection will have a parent list item and should have a depth of > 0 -- a prefLabel and any altLabels for a concept become list item values + -- a concept that participates in multiple collections will have different list item id's for each new list it belongs to + -- - -- in the RDM concepts are sorted alphabetically, but are explicitly ordered using a list item's sortorder... + -- in the RDM concepts are sorted alphabetically, but list items are explicitly ordered using sortorder... -- sort order is calculated at the list level and ordered alphabetically within each leaf of the hierarchy -- Check if collection_names are provided @@ -133,69 +136,91 @@ class Migration(migrations.Migration): -- The recursive CTE below is used to assign the conceptid of the list at the root to each concept to be migrated -- On each recursion, it checks if the child (aka conceptidto in relations table) is a parent for another concept - -- All the while, it keeps track of the depth of the child concept, to be used for sorting in the next CTE - with recursive collection_hierarchy as ( - select conceptidfrom as root_list, - conceptidto as child, - 0 as depth - from relations - where not exists ( - select 1 from relations r2 where r2.conceptidto = relations.conceptidfrom - ) and relationtype = 'member' - union all - select ch.root_list, - r.conceptidto, - ch.depth + 1 - from collection_hierarchy ch - join relations r on ch.child = r.conceptidfrom - where relationtype = 'member' - ), - -- Rank prefLabels by user provided language, - -- if no prefLabel in that language exists for a concept, fall back on next prefLabel ordered by languageid - ranked_prefLabels as ( - select ch.root_list, - ch.child, - ch.depth, - v.languageid, v.value, - ROW_NUMBER() OVER (PARTITION BY ch.child ORDER BY (v.languageid = preferred_sort_language) DESC, languages.id) AS language_rank, - r.conceptidfrom - from collection_hierarchy ch - left join values v on v.conceptid = ch.child - left join relations r on r.conceptidto = ch.child - left join languages on v.languageid = languages.code - where v.valuetype = 'prefLabel' and - r.relationtype = 'member' - ), - -- Once we've assigned our root_list, we want to sort the children (to depth n) alphabetically based on their ranked prefLabel - -- We also want to take INTO account the child's parent value, so the relations table is joined back to capture the parent. - alpha_sorted_list_item_hierarchy as ( - select child as id, - row_number() over (partition by root_list order by depth, LOWER(value)) - 1 as sortorder, - root_list as list_id, - case when conceptidfrom = root_list then null -- list items at top of hierarchy have no parent list item - else conceptidfrom - end as parent_id, - depth - from ranked_prefLabels rpl - where language_rank = 1 and - root_list in (select id from arches_references_list where name = ANY(collection_names)) - ) - insert into arches_references_listitem( - id, - uri, - sortorder, - guide, - list_id, - parent_id - ) - select id, - host || id as uri, - sortorder, - false as guide, - list_id, - parent_id - from alpha_sorted_list_item_hierarchy; + -- All the while, it keeps track of the depth of the child concept, to be used for sorting in the next CTE + -- The results are stored in a temporary table to avoid re-running non-filtered recursion (done on the whole relations table) + + create temporary table temp_collection_hierarchy as + with recursive collection_hierarchy as ( + select conceptidfrom as root_list, + conceptidto as child, + 0 as depth + from relations + where not exists ( + select 1 from relations r2 where r2.conceptidto = relations.conceptidfrom + ) and relationtype = 'member' + union all + select ch.root_list, + r.conceptidto, + ch.depth + 1 + from collection_hierarchy ch + join relations r on ch.child = r.conceptidfrom + where relationtype = 'member' + ) + select * from collection_hierarchy; + + foreach collection in array collection_names loop + with filtered_collection_hierarchy as ( + select * + from temp_collection_hierarchy + where root_list in (select id from arches_references_list where name = collection) + ), + -- Rank prefLabels by user provided language, + -- if no prefLabel in that language exists for a concept, fall back on next prefLabel ordered by languageid + ranked_prefLabels as ( + select ch.root_list, + ch.child, + ch.depth, + v.languageid, v.value, + ROW_NUMBER() OVER (PARTITION BY ch.child ORDER BY (v.languageid = preferred_sort_language) DESC, languages.id) AS language_rank, + r.conceptidfrom + from filtered_collection_hierarchy ch + left join values v on v.conceptid = ch.child + left join relations r on r.conceptidto = ch.child + left join languages on v.languageid = languages.code + where v.valuetype = 'prefLabel' and + r.relationtype = 'member' + ), + filtered_ranked_prefLabels as ( + select * + from ranked_prefLabels + where conceptidfrom in ( + select root_list from ranked_prefLabels + union + select child from ranked_prefLabels + ) + ), + -- Once we've assigned our root_list, we want to sort the children (to depth n) alphabetically based on their ranked prefLabel + -- We also want to take into account the child's parent value, so the relations table is joined back to capture the parent. + alpha_sorted_list_item_hierarchy as ( + select child as id, + row_number() over (partition by root_list order by depth, LOWER(value)) - 1 as sortorder, + root_list as list_id, + case when conceptidfrom = root_list then null -- list items at top of hierarchy have no parent list item + else conceptidfrom + end as parent_id, + depth + from filtered_ranked_prefLabels rpl + where language_rank = 1 and + root_list in (select id from arches_references_list where name = collection) + ) + insert into arches_references_listitem( + id, + uri, + sortorder, + guide, + list_id, + parent_id + ) + select id, + host || id as uri, + sortorder, + false as guide, + list_id, + parent_id + from alpha_sorted_list_item_hierarchy; + end loop; + drop table if exists temp_collection_hierarchy; -- Migrate concept values -> controlled list item values insert into arches_references_listitemvalue ( From a834ac0391a3505e4cad9e78f0e43893c64c3a99 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Tue, 13 Aug 2024 10:19:36 -0400 Subject: [PATCH 02/25] clarification --- .../migrations/0002_etl_collections_to_controlled_lists.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index eee5f2f..6e1360d 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -37,8 +37,7 @@ class Migration(migrations.Migration): -- a concept at the top of a collection does NOT have a parent list item and should have a depth of 0 -- a concept below the top concepts of the collection will have a parent list item and should have a depth of > 0 -- a prefLabel and any altLabels for a concept become list item values - -- a concept that participates in multiple collections will have different list item id's for each new list it belongs to - -- + -- a concept that participates in multiple collections will have distinct list items for each new list it belongs to -- in the RDM concepts are sorted alphabetically, but list items are explicitly ordered using sortorder... -- sort order is calculated at the list level and ordered alphabetically within each leaf of the hierarchy From 97c31c2df3381cb8be3390f517d739147de42252 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Tue, 13 Aug 2024 13:06:41 -0400 Subject: [PATCH 03/25] Reduce queries to existing RDM tables, use temp tbl to gather records being created #23 --- ...002_etl_collections_to_controlled_lists.py | 101 ++++++++++++------ 1 file changed, 71 insertions(+), 30 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 6e1360d..9a19c27 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -14,12 +14,12 @@ class Migration(migrations.Migration): create or replace function __arches_migrate_collections_to_clm( collection_names text[] default null, -- one or more collections to be migrated to controlled lists host text default 'http://localhost:8000/plugins/controlled-list-manager/item/', - overwrite boolean default FALSE, + overwrite boolean default FALSE, preferred_sort_language text default 'en' ) returns text as $$ declare failed_collections text[]; - collection text; + collection text; begin -- RDM Collections to Controlled Lists & List Items Migration -- -- To use, run: @@ -156,6 +156,19 @@ class Migration(migrations.Migration): where relationtype = 'member' ) select * from collection_hierarchy; + + create temporary table temp_list_items_and_values ( + list_item_id uuid, + sortorder bigint, + list_id uuid, + parent_id uuid, + depth int, + legacy_conceptid uuid, + listitemvalue_id uuid, + listitemvalue text, + listitemvalue_languageid text, + listitemvalue_valuetype text + ); foreach collection in array collection_names loop with filtered_collection_hierarchy as ( @@ -202,24 +215,57 @@ class Migration(migrations.Migration): where language_rank = 1 and root_list in (select id from arches_references_list where name = collection) ) - insert into arches_references_listitem( - id, - uri, + insert into temp_list_items_and_values ( + list_item_id, sortorder, - guide, list_id, - parent_id + parent_id, + depth, + legacy_conceptid, + listitemvalue_id, + listitemvalue, + listitemvalue_languageid, + listitemvalue_valuetype ) - select id, - host || id as uri, - sortorder, - false as guide, - list_id, - parent_id - from alpha_sorted_list_item_hierarchy; + select lih.id as list_item_id, + lih.sortorder, + lih.list_id, + lih.parent_id, + lih.depth, + lih.id as legacy_conceptid, + v.valueid as listitemvalue_id, + v.value, + v.languageid, + v.valuetype + from alpha_sorted_list_item_hierarchy lih + join values v on v.conceptid = lih.id + where valuetype = 'prefLabel' + or valuetype = 'altLabel' + or valuetype = 'scopeNote' + or valuetype = 'definition' + or valuetype = 'example' + or valuetype = 'historyNote' + or valuetype = 'editorialNote' + or valuetype = 'changeNote' + or valuetype = 'note' + or valuetype = 'description'; end loop; - - drop table if exists temp_collection_hierarchy; + + insert into arches_references_listitem ( + id, + uri, + sortorder, + guide, + list_id, + parent_id + ) + select distinct list_item_id, + host || legacy_conceptid as uri, + sortorder, + false as guide, + list_id, + parent_id + from temp_list_items_and_values; -- Migrate concept values -> controlled list item values insert into arches_references_listitemvalue ( @@ -229,20 +275,15 @@ class Migration(migrations.Migration): languageid, valuetype_id ) - select distinct (v.valueid) id, - value, - r.conceptidto as list_item_id, - languageid, - valuetype as valuetype_id - from relations r - full join values v on r.conceptidto = v.conceptid - where relationtype = 'member' and - (valuetype = 'prefLabel' or valuetype = 'altLabel') and - r.conceptidto in ( - select id from arches_references_listitem where list_id in ( - select id from arches_references_list where name = ANY(collection_names) - ) - ); + select listitemvalue_id, + listitemvalue, + list_item_id, + listitemvalue_languageid, + listitemvalue_valuetype + from temp_list_items_and_values; + + drop table if exists temp_collection_hierarchy; + drop table if exists temp_list_items_and_values; return format('Collection(s) %s migrated to controlled list(s)', array_to_string(collection_names, ', ')); end; From cc7a5698849e186c69061de0054cede5b9a7e657 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 14 Aug 2024 10:41:51 -0400 Subject: [PATCH 04/25] Improve filter on which tree is being built for concepts in multiple collections #23 --- ...002_etl_collections_to_controlled_lists.py | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 9a19c27..f43e2a9 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -141,7 +141,8 @@ class Migration(migrations.Migration): create temporary table temp_collection_hierarchy as with recursive collection_hierarchy as ( select conceptidfrom as root_list, - conceptidto as child, + conceptidto as child, + ARRAY[conceptidfrom] AS path, 0 as depth from relations where not exists ( @@ -150,6 +151,7 @@ class Migration(migrations.Migration): union all select ch.root_list, r.conceptidto, + ch.path || r.conceptidfrom, ch.depth + 1 from collection_hierarchy ch join relations r on ch.child = r.conceptidfrom @@ -184,22 +186,15 @@ class Migration(migrations.Migration): ch.depth, v.languageid, v.value, ROW_NUMBER() OVER (PARTITION BY ch.child ORDER BY (v.languageid = preferred_sort_language) DESC, languages.id) AS language_rank, - r.conceptidfrom + r.conceptidfrom, + ch.path from filtered_collection_hierarchy ch left join values v on v.conceptid = ch.child left join relations r on r.conceptidto = ch.child left join languages on v.languageid = languages.code - where v.valuetype = 'prefLabel' and - r.relationtype = 'member' - ), - filtered_ranked_prefLabels as ( - select * - from ranked_prefLabels - where conceptidfrom in ( - select root_list from ranked_prefLabels - union - select child from ranked_prefLabels - ) + where v.valuetype = 'prefLabel' + and r.relationtype = 'member' + and r.conceptidfrom in (select unnest(path) from filtered_collection_hierarchy) ), -- Once we've assigned our root_list, we want to sort the children (to depth n) alphabetically based on their ranked prefLabel -- We also want to take into account the child's parent value, so the relations table is joined back to capture the parent. @@ -211,7 +206,7 @@ class Migration(migrations.Migration): else conceptidfrom end as parent_id, depth - from filtered_ranked_prefLabels rpl + from ranked_prefLabels rpl where language_rank = 1 and root_list in (select id from arches_references_list where name = collection) ) From 552b9ca07d495cc7ec8b34fc5aebc4e14d8f02a3 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 14 Aug 2024 13:46:54 -0400 Subject: [PATCH 05/25] Mint new item and itemvalue ids for concepts that participate in multiple collections #23 --- ...002_etl_collections_to_controlled_lists.py | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index f43e2a9..130beb1 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -20,6 +20,9 @@ class Migration(migrations.Migration): returns text as $$ declare failed_collections text[]; collection text; + rec record; + new_listitem_id uuid; + new_listitemvalue_id uuid; begin -- RDM Collections to Controlled Lists & List Items Migration -- -- To use, run: @@ -169,7 +172,8 @@ class Migration(migrations.Migration): listitemvalue_id uuid, listitemvalue text, listitemvalue_languageid text, - listitemvalue_valuetype text + listitemvalue_valuetype text, + rownumber int ); foreach collection in array collection_names loop @@ -245,6 +249,57 @@ class Migration(migrations.Migration): or valuetype = 'note' or valuetype = 'description'; end loop; + + with assign_row_num as ( + select list_item_id, + sortorder, + list_id, + parent_id, + depth, + ROW_NUMBER() OVER (PARTITION BY list_item_id ORDER BY depth ASC, sortorder ASC) as init_rownumber + from temp_list_items_and_values t + ) + update temp_list_items_and_values t + set rownumber = init_rownumber + from assign_row_num a + where t.list_item_id = a.list_item_id + and t.list_id = a.list_id; + + for rec in + select * + from temp_list_items_and_values + where list_item_id in ( + with list_item_parent_count as ( + select list_item_id, count (list_item_id) + from temp_list_items_and_values + where listitemvalue_valuetype = 'prefLabel' + group by list_item_id + union + select id as list_item_id, count(id) + from arches_references_listitem + group by id + ) + select list_item_id + from temp_list_items_and_values + where list_item_id in ( + select list_item_id + from list_item_parent_count + where count > 1 + ) + ) and listitemvalue_valuetype = 'prefLabel' + order by list_item_id, depth asc, sortorder asc + loop + if rec.rownumber > 1 + then + new_listitem_id := uuid_generate_v4(); + new_listitemvalue_id := uuid_generate_v4(); + update temp_list_items_and_values + set list_item_id = new_listitem_id, + listitemvalue_id = new_listitemvalue_id + where list_item_id = rec.list_item_id + and rownumber = rec.rownumber; + end if; + end loop; insert into arches_references_listitem ( id, From 1eecf83877eac37f1eff6a9e2d36320d68ed6cac Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 14 Aug 2024 14:03:23 -0400 Subject: [PATCH 06/25] Add descriptions for new logic #23 --- .../migrations/0002_etl_collections_to_controlled_lists.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 130beb1..8f30007 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -140,6 +140,7 @@ class Migration(migrations.Migration): -- On each recursion, it checks if the child (aka conceptidto in relations table) is a parent for another concept -- All the while, it keeps track of the depth of the child concept, to be used for sorting in the next CTE -- The results are stored in a temporary table to avoid re-running non-filtered recursion (done on the whole relations table) + -- We keep track of the hierarchy path in order to account for concepts that participate in multiple collections create temporary table temp_collection_hierarchy as with recursive collection_hierarchy as ( @@ -162,6 +163,7 @@ class Migration(migrations.Migration): ) select * from collection_hierarchy; + -- This temp table is used to stage list items and values create temporary table temp_list_items_and_values ( list_item_id uuid, sortorder bigint, @@ -176,6 +178,8 @@ class Migration(migrations.Migration): rownumber int ); + -- Build the new hierarchies at the list level, mainly to account for concepts that participate in multiple collections + -- then stash results in temp table for preprocessing before inserting into CLM tables foreach collection in array collection_names loop with filtered_collection_hierarchy as ( select * @@ -250,6 +254,7 @@ class Migration(migrations.Migration): or valuetype = 'description'; end loop; + -- Assign row number to help identify concepts that participate in multiple collections with assign_row_num as ( select list_item_id, sortorder, @@ -265,6 +270,7 @@ class Migration(migrations.Migration): where t.list_item_id = a.list_item_id and t.list_id = a.list_id; + -- For concepts that participate in multiple collections, mint new listitem_id's and listitemvalue_id's for rec in select * from temp_list_items_and_values From 3e5203ba8dc527b5abffaa449cc4fa9648966184 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 15 Aug 2024 12:36:31 -0400 Subject: [PATCH 07/25] Check for listitems that already exist in CLM, but participate in collection being migrated #23 --- ...002_etl_collections_to_controlled_lists.py | 47 ++++++++----------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 8f30007..4585aa6 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -169,7 +169,6 @@ class Migration(migrations.Migration): sortorder bigint, list_id uuid, parent_id uuid, - depth int, legacy_conceptid uuid, listitemvalue_id uuid, listitemvalue text, @@ -223,7 +222,6 @@ class Migration(migrations.Migration): sortorder, list_id, parent_id, - depth, legacy_conceptid, listitemvalue_id, listitemvalue, @@ -234,7 +232,6 @@ class Migration(migrations.Migration): lih.sortorder, lih.list_id, lih.parent_id, - lih.depth, lih.id as legacy_conceptid, v.valueid as listitemvalue_id, v.value, @@ -260,9 +257,23 @@ class Migration(migrations.Migration): sortorder, list_id, parent_id, - depth, - ROW_NUMBER() OVER (PARTITION BY list_item_id ORDER BY depth ASC, sortorder ASC) as init_rownumber - from temp_list_items_and_values t + existing_item, + ROW_NUMBER() OVER (PARTITION BY list_item_id ORDER BY existing_item DESC, sortorder ASC) as init_rownumber + from ( + select list_item_id, + sortorder, + list_id, + parent_id, + FALSE as existing_item + from temp_list_items_and_values + union all + select id as list_item_id, + sortorder, + list_id, + parent_id, + TRUE as existing_item + from arches_references_listitem + ) as t ) update temp_list_items_and_values t set rownumber = init_rownumber @@ -273,27 +284,9 @@ class Migration(migrations.Migration): -- For concepts that participate in multiple collections, mint new listitem_id's and listitemvalue_id's for rec in select * - from temp_list_items_and_values - where list_item_id in ( - with list_item_parent_count as ( - select list_item_id, count (list_item_id) - from temp_list_items_and_values - where listitemvalue_valuetype = 'prefLabel' - group by list_item_id - union - select id as list_item_id, count(id) - from arches_references_listitem - group by id - ) - select list_item_id - from temp_list_items_and_values - where list_item_id in ( - select list_item_id - from list_item_parent_count - where count > 1 - ) - ) and listitemvalue_valuetype = 'prefLabel' - order by list_item_id, depth asc, sortorder asc + from testing_list_items_and_values + where rownumber > 1 + and listitemvalue_valuetype = 'prefLabel' loop if rec.rownumber > 1 then From 3ed8d950e58a3a98a38ce1a1344cd8859c0c5293 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 15 Aug 2024 12:36:56 -0400 Subject: [PATCH 08/25] Add note about apostrophes in collection names --- .../migrations/0002_etl_collections_to_controlled_lists.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 4585aa6..c3800d8 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -33,6 +33,7 @@ class Migration(migrations.Migration): -- 'en' -- ); -- where the input array values are concept prefLabels or identifiers and the optional language is used for sorting + -- for collections that contain an apostrophe, use two single quotes, e.g. 'John''s list' -- Conceptually: -- a collection becomes a list From 1cd0ae46996b40471473c0e1fc6fc47200395db6 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 15 Aug 2024 13:22:35 -0400 Subject: [PATCH 09/25] nit #23 --- .../migrations/0002_etl_collections_to_controlled_lists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index c3800d8..72f781d 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -285,7 +285,7 @@ class Migration(migrations.Migration): -- For concepts that participate in multiple collections, mint new listitem_id's and listitemvalue_id's for rec in select * - from testing_list_items_and_values + from temp_list_items_and_values where rownumber > 1 and listitemvalue_valuetype = 'prefLabel' loop From d9486d751724bc1350dc0a28920ed1cf7fd51819 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 15 Aug 2024 13:55:36 -0400 Subject: [PATCH 10/25] Simplify logic for minting new ids for items and values #23 --- ...002_etl_collections_to_controlled_lists.py | 25 +++++-------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 72f781d..a4e6d12 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -21,8 +21,6 @@ class Migration(migrations.Migration): declare failed_collections text[]; collection text; rec record; - new_listitem_id uuid; - new_listitemvalue_id uuid; begin -- RDM Collections to Controlled Lists & List Items Migration -- -- To use, run: @@ -253,6 +251,7 @@ class Migration(migrations.Migration): end loop; -- Assign row number to help identify concepts that participate in multiple collections + -- or exist already as listitems and therefore need new listitem_id's and listitemvalue_id's with assign_row_num as ( select list_item_id, sortorder, @@ -283,23 +282,11 @@ class Migration(migrations.Migration): and t.list_id = a.list_id; -- For concepts that participate in multiple collections, mint new listitem_id's and listitemvalue_id's - for rec in - select * - from temp_list_items_and_values - where rownumber > 1 - and listitemvalue_valuetype = 'prefLabel' - loop - if rec.rownumber > 1 - then - new_listitem_id := uuid_generate_v4(); - new_listitemvalue_id := uuid_generate_v4(); - update temp_list_items_and_values - set list_item_id = new_listitem_id, - listitemvalue_id = new_listitemvalue_id - where list_item_id = rec.list_item_id - and rownumber = rec.rownumber; - end if; - end loop; + update temp_list_items_and_values + set list_item_id = uuid_generate_v4(), + listitemvalue_id = uuid_generate_v4() + where rownumber > 1 + and listitemvalue_valuetype = 'prefLabel'; insert into arches_references_listitem ( id, From 3e2821c370bf03bee01157bfd139b98f21195353 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 29 Aug 2024 15:38:56 -0400 Subject: [PATCH 11/25] Add note for how to handle apostrophes in collection names on python cmd side #23 --- arches_references/management/commands/controlled_lists.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py index e04530f..c4c8fa7 100644 --- a/arches_references/management/commands/controlled_lists.py +++ b/arches_references/management/commands/controlled_lists.py @@ -82,6 +82,8 @@ def migrate_collections_to_controlled_lists( -ho 'http://localhost:8000/plugins/controlled-list-manager/item/' -psl 'fr' -ow + + for collections that contain an apostrophe, use wrap the concept in double quotes, e.g. "John''s list" """ collections_in_db = list( From 5726d1b96e60efa131786030faa10bfa2749eeee Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 10:09:04 -0400 Subject: [PATCH 12/25] nit #23 --- arches_references/management/commands/controlled_lists.py | 3 ++- .../migrations/0002_etl_collections_to_controlled_lists.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py index c4c8fa7..f62d749 100644 --- a/arches_references/management/commands/controlled_lists.py +++ b/arches_references/management/commands/controlled_lists.py @@ -83,7 +83,8 @@ def migrate_collections_to_controlled_lists( -psl 'fr' -ow - for collections that contain an apostrophe, use wrap the concept in double quotes, e.g. "John''s list" + for collections that contain an apostrophe, wrap the concept in double quotes, e.g. "John''s list" + """ collections_in_db = list( diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index a4e6d12..9bfe481 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -46,7 +46,7 @@ class Migration(migrations.Migration): -- Check if collection_names are provided if collection_names is null or array_length(collection_names, 1) = 0 then - return 'No collection names or identifiers provided.'; + raise exception 'No collection names or identifiers provided.'; end if; -- Check if input collection names or identifiers exist in the database From f8490d552ab61bf1fc43f1e79cfd39d5f64be94e Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 14:03:25 -0400 Subject: [PATCH 13/25] Avoid id clashes when migrating collections with the same concepts that have multiple labels #23 --- ...002_etl_collections_to_controlled_lists.py | 39 ++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 9bfe481..c3368e7 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -20,7 +20,7 @@ class Migration(migrations.Migration): returns text as $$ declare failed_collections text[]; collection text; - rec record; + listitems_to_update_with_multiple_values uuid[]; begin -- RDM Collections to Controlled Lists & List Items Migration -- -- To use, run: @@ -282,11 +282,39 @@ class Migration(migrations.Migration): and t.list_id = a.list_id; -- For concepts that participate in multiple collections, mint new listitem_id's and listitemvalue_id's + -- However, if a concept needs a new listitem_id, and has multiple values associated with it, ensure that + -- the new listitem_id is the same for all listitemvalues + listitems_to_update_with_multiple_values := array( + select list_item_id + from temp_list_items_and_values + where rownumber > 1 + group by list_item_id + having count(*) > 1 + ); + + with new_list_item_ids as ( + select legacy_list_item_id, + uuid_generate_v4() as new_list_item_id + from unnest(listitems_to_update_with_multiple_values) as t(legacy_list_item_id) + ) + update temp_list_items_and_values t + set list_item_id = new_list_item_id + from new_list_item_ids n + where t.list_item_id = n.legacy_list_item_id + and rownumber > 1; + + -- Update list_item_ids for items that don't have multiple values update temp_list_items_and_values - set list_item_id = uuid_generate_v4(), - listitemvalue_id = uuid_generate_v4() + set list_item_id = uuid_generate_v4() where rownumber > 1 - and listitemvalue_valuetype = 'prefLabel'; + and listitemvalue_valuetype = 'prefLabel' + and legacy_conceptid != any(listitems_to_update_with_multiple_values) + and list_item_id = legacy_conceptid; + + -- Update listitemvalue_ids + update temp_list_items_and_values + set listitemvalue_id = uuid_generate_v4() + where rownumber > 1; insert into arches_references_listitem ( id, @@ -296,7 +324,8 @@ class Migration(migrations.Migration): list_id, parent_id ) - select distinct list_item_id, + select distinct on (list_item_id, list_id) + list_item_id, host || legacy_conceptid as uri, sortorder, false as guide, From 0c64d03045bd712c17208d1514717f50d2dc63d0 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 15:34:33 -0400 Subject: [PATCH 14/25] Avoid hardcoding prefLabel #23 --- .../migrations/0002_etl_collections_to_controlled_lists.py | 1 - 1 file changed, 1 deletion(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index c3368e7..4de85cc 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -307,7 +307,6 @@ class Migration(migrations.Migration): update temp_list_items_and_values set list_item_id = uuid_generate_v4() where rownumber > 1 - and listitemvalue_valuetype = 'prefLabel' and legacy_conceptid != any(listitems_to_update_with_multiple_values) and list_item_id = legacy_conceptid; From 950106c7c4737a37c232be5cffc2f6fc08342589 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 15:37:54 -0400 Subject: [PATCH 15/25] Move tests to use django native test fixtures #23 --- tests/cli_tests.py | 21 +- tests/data/concept_label_test_collection.xml | 28 -- tests/data/polyhierarchical_collections.json | 443 +++++++++++++++++++ 3 files changed, 449 insertions(+), 43 deletions(-) delete mode 100644 tests/data/concept_label_test_collection.xml create mode 100644 tests/data/polyhierarchical_collections.json diff --git a/tests/cli_tests.py b/tests/cli_tests.py index e87b7c0..7868f7c 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -66,19 +66,10 @@ class RDMToControlledListsETLTests(TestCase): @classmethod def setUpTestData(cls): - skos = SKOSReader() - rdf = skos.read_file( - os.path.join(PROJECT_TEST_ROOT, "data", "concept_label_test_collection.xml") - ) - ret = skos.save_concepts_from_skos(rdf) - - client = Client() - client.login(username="admin", password="admin") - response = client.get( - reverse( - "make_collection", - kwargs={"conceptid": "7c90899a-dbe9-4574-9175-e69481a80b3c"}, - ) + management.call_command( + "loaddata", + "tests/data/polyhierarchical_collections.json", + format="json", ) def test_migrate_collections_to_controlled_lists(self): @@ -86,14 +77,14 @@ def test_migrate_collections_to_controlled_lists(self): management.call_command( "controlled_lists", operation="migrate_collections_to_controlled_lists", - collections_to_migrate=["Concept Label Import Test"], + collections_to_migrate=["Polyhierarchical Collection Test"], host="http://localhost:8000/plugins/controlled-list-manager/item/", preferred_sort_language="en", overwrite=False, stdout=output, ) - imported_list = List.objects.get(name="Concept Label Import Test") + imported_list = List.objects.get(name="Polyhierarchical Collection Test") imported_items = imported_list.list_items.all() self.assertEqual(len(imported_items), 3) diff --git a/tests/data/concept_label_test_collection.xml b/tests/data/concept_label_test_collection.xml deleted file mode 100644 index a8d838f..0000000 --- a/tests/data/concept_label_test_collection.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - {"id": "7949d7b5-6e57-469a-8f38-87aac08e1788", "value": "Test Concept 2"} - - - - - {"id": "fad6f17d-f7c8-4fa1-b358-e8626571599e", "value": "Test Concept 3"} - - - - - - - {"id": "9fa56006-6828-480f-8395-ad5c5a84726b", "value": "Test Concept 1"} - - - {"id": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", "value": "Concept Label Import Test"} - - diff --git a/tests/data/polyhierarchical_collections.json b/tests/data/polyhierarchical_collections.json new file mode 100644 index 0000000..adbdedf --- /dev/null +++ b/tests/data/polyhierarchical_collections.json @@ -0,0 +1,443 @@ +[ + { + "model": "models.concept", + "pk": "00000000-0000-0000-0000-000000000001", + "fields": { + "nodetype": "ConceptScheme", + "legacyoid": "ARCHES" + } + }, + { + "model": "models.concept", + "pk": "00000000-0000-0000-0000-000000000004", + "fields": { + "nodetype": "Concept", + "legacyoid": "ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES CONCEPT" + } + }, + { + "model": "models.concept", + "pk": "00000000-0000-0000-0000-000000000005", + "fields": { + "nodetype": "Collection", + "legacyoid": "ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES COLLECTION" + } + }, + { + "model": "models.concept", + "pk": "00000000-0000-0000-0000-000000000006", + "fields": { + "nodetype": "ConceptScheme", + "legacyoid": "CANDIDATES" + } + }, + { + "model": "models.concept", + "pk": "00000000-0000-0000-0000-000000000007", + "fields": { + "nodetype": "Concept", + "legacyoid": "DEFAULT RESOURCE TO RESOURCE RELATIONSHIP TYPE" + } + }, + { + "model": "models.concept", + "pk": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "fields": { + "nodetype": "Concept", + "legacyoid": "http://localhost:8000/0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8" + } + }, + { + "model": "models.concept", + "pk": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "fields": { + "nodetype": "Concept", + "legacyoid": "http://localhost:8000/6490ac87-ac80-41d7-a135-1119b4cd912d" + } + }, + { + "model": "models.concept", + "pk": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "fields": { + "nodetype": "ConceptScheme", + "legacyoid": "http://localhost:8000/7c90899a-dbe9-4574-9175-e69481a80b3c" + } + }, + { + "model": "models.concept", + "pk": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "fields": { + "nodetype": "Collection", + "legacyoid": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7" + } + }, + { + "model": "models.concept", + "pk": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "fields": { + "nodetype": "Concept", + "legacyoid": "http://localhost:8000/89ff530a-f350-44f0-ac88-bdd8904eb57e" + } + }, + { + "model": "models.concept", + "pk": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", + "fields": { + "nodetype": "Collection", + "legacyoid": "Polyhiearchy Collection 2" + } + }, + { + "model": "models.value", + "pk": "1219d940-701f-4e88-9105-b9d6d3ca1022", + "fields": { + "concept": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", + "valuetype": "prefLabel", + "value": "Polyhiearchy Collection 2", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "34458821-a31b-4727-8199-d805efb344ad", + "fields": { + "concept": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "valuetype": "identifier", + "value": "http://www.archesproject.org/0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "7949d7b5-6e57-469a-8f38-87aac08e1788", + "fields": { + "concept": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "valuetype": "prefLabel", + "value": "Test Concept 2", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "91f70cc9-f9f2-48ab-8393-9ae6c26e6c8e", + "fields": { + "concept": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "valuetype": "prefLabel", + "value": "French Test Concept 1", + "language": "fr" + } + }, + { + "model": "models.value", + "pk": "97ab3f70-d0e5-49ed-b896-3574251f5723", + "fields": { + "concept": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "valuetype": "prefLabel", + "value": "Polyhierarchical Collection Test", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "9fa56006-6828-480f-8395-ad5c5a84726b", + "fields": { + "concept": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "valuetype": "prefLabel", + "value": "Test Concept 1", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "ac41d9be-79db-4256-b368-2f4559cfbe55", + "fields": { + "concept": "00000000-0000-0000-0000-000000000007", + "valuetype": "prefLabel", + "value": "is related to", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "b1f230c5-c9c3-4f86-a23f-8c56b2094c86", + "fields": { + "concept": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "valuetype": "identifier", + "value": "http://www.archesproject.org/6490ac87-ac80-41d7-a135-1119b4cd912d", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "b967985b-585c-4f70-8b55-20c251250ba7", + "fields": { + "concept": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "valuetype": "prefLabel", + "value": "French Test Concept 2", + "language": "fr" + } + }, + { + "model": "models.value", + "pk": "bc905a83-223b-419e-850c-4e9c0c1b817a", + "fields": { + "concept": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "valuetype": "identifier", + "value": "http://www.archesproject.org/89ff530a-f350-44f0-ac88-bdd8904eb57e", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "c12e7e6c-e417-11e6-b14b-0738913905b4", + "fields": { + "concept": "00000000-0000-0000-0000-000000000004", + "valuetype": "prefLabel", + "value": "Resource To Resource Relationship Types", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "c477760d-ec4e-4911-81d1-706a2ffb6a22", + "fields": { + "concept": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "valuetype": "identifier", + "value": "http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "d8c60bf4-e786-11e6-905a-b756ec83dad5", + "fields": { + "concept": "00000000-0000-0000-0000-000000000001", + "valuetype": "prefLabel", + "value": "Arches", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "d8c622f6-e786-11e6-905a-475a5eee86f5", + "fields": { + "concept": "00000000-0000-0000-0000-000000000005", + "valuetype": "prefLabel", + "value": "Resource To Resource Relationship Types", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "ed0c85a3-d679-42e2-b465-b5f79e53122b", + "fields": { + "concept": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "valuetype": "identifier", + "value": "http://www.archesproject.org/7c90899a-dbe9-4574-9175-e69481a80b3c", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "f5e1a756-c658-4a3c-bc3a-e9293242e8f7", + "fields": { + "concept": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "valuetype": "prefLabel", + "value": "Polyhierarchical Collection Test", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "fad6f17d-f7c8-4fa1-b358-e8626571599e", + "fields": { + "concept": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "valuetype": "prefLabel", + "value": "Test Concept 3", + "language": "en" + } + }, + { + "model": "models.value", + "pk": "fdbbc8e9-9996-4ae2-b244-29bfcf4f8951", + "fields": { + "concept": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "valuetype": "prefLabel", + "value": "French Test Concept 3", + "language": "fr" + } + }, + { + "model": "models.value", + "pk": "fee39428-e83f-11e6-b49d-9b976819ac02", + "fields": { + "concept": "00000000-0000-0000-0000-000000000006", + "valuetype": "prefLabel", + "value": "Candidates", + "language": "en" + } + }, + { + "model": "models.relation", + "pk": "1704bf8f-1ec0-43f0-a4d0-c444baf067fb", + "fields": { + "conceptfrom": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "conceptto": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "25da169c-e2a2-47f2-8469-216240853fee", + "fields": { + "conceptfrom": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "conceptto": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "relationtype": "hasTopConcept" + } + }, + { + "model": "models.relation", + "pk": "5391022a-735d-4dab-b695-0ec107d2812e", + "fields": { + "conceptfrom": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "conceptto": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "relationtype": "hasTopConcept" + } + }, + { + "model": "models.relation", + "pk": "54d5685a-daf3-45af-b619-9bf61cb4d017", + "fields": { + "conceptfrom": "7c90899a-dbe9-4574-9175-e69481a80b3c", + "conceptto": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "relationtype": "hasTopConcept" + } + }, + { + "model": "models.relation", + "pk": "62c053fe-706f-11ef-b739-5f5ed88be48e", + "fields": { + "conceptfrom": "00000000-0000-0000-0000-000000000001", + "conceptto": "00000000-0000-0000-0000-000000000004", + "relationtype": "hasTopConcept" + } + }, + { + "model": "models.relation", + "pk": "62c061d2-706f-11ef-b739-9b431c19ea92", + "fields": { + "conceptfrom": "00000000-0000-0000-0000-000000000005", + "conceptto": "00000000-0000-0000-0000-000000000007", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "62f2909e-706f-11ef-b739-07f87b82b9bb", + "fields": { + "conceptfrom": "00000000-0000-0000-0000-000000000004", + "conceptto": "00000000-0000-0000-0000-000000000007", + "relationtype": "narrower" + } + }, + { + "model": "models.relation", + "pk": "7096e255-3c88-4e1d-ab02-3d917afb9e16", + "fields": { + "conceptfrom": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "conceptto": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "81d4e52f-1750-4a30-83c7-e92cbfb8e1c5", + "fields": { + "conceptfrom": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", + "conceptto": "0fc72a30-2ead-4afd-9e7a-a5067d2cc5c8", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "8e813235-2aaa-4d36-bc9a-33d39e2b41c3", + "fields": { + "conceptfrom": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", + "conceptto": "89ff530a-f350-44f0-ac88-bdd8904eb57e", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "b066c2d2-d12a-4689-8a70-400d80c1e39b", + "fields": { + "conceptfrom": "843c76cb-7e2a-47dd-b588-a0d0fcb00de7", + "conceptto": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "relationtype": "member" + } + }, + { + "model": "models.relation", + "pk": "d149161a-74b2-4887-a2b5-e10957c970c3", + "fields": { + "conceptfrom": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", + "conceptto": "6490ac87-ac80-41d7-a135-1119b4cd912d", + "relationtype": "member" + } + }, + { + "model": "models.language", + "pk": 1, + "fields": { + "code": "en", + "name": "English", + "default_direction": "ltr", + "scope": "system", + "isdefault": true + } + }, + { + "model": "models.language", + "pk": 2, + "fields": { + "code": "de", + "name": "German", + "default_direction": "ltr", + "scope": "system", + "isdefault": false + } + }, + { + "model": "models.language", + "pk": 3, + "fields": { + "code": "en-gb", + "name": "British English", + "default_direction": "ltr", + "scope": "system", + "isdefault": false + } + }, + { + "model": "models.language", + "pk": 4, + "fields": { + "code": "es", + "name": "Spanish", + "default_direction": "ltr", + "scope": "system", + "isdefault": false + } + }, + { + "model": "models.language", + "pk": 5, + "fields": { + "code": "fr", + "name": "French", + "default_direction": "ltr", + "scope": "system", + "isdefault": false + } + } +] \ No newline at end of file From 34bb96159f6085a8bf053186096cd7caa49ff0e6 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 16:11:52 -0400 Subject: [PATCH 16/25] typo nit #23 --- tests/data/polyhierarchical_collections.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/polyhierarchical_collections.json b/tests/data/polyhierarchical_collections.json index adbdedf..464e766 100644 --- a/tests/data/polyhierarchical_collections.json +++ b/tests/data/polyhierarchical_collections.json @@ -84,7 +84,7 @@ "pk": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", "fields": { "nodetype": "Collection", - "legacyoid": "Polyhiearchy Collection 2" + "legacyoid": "Polyhierarchy Collection 2" } }, { @@ -93,7 +93,7 @@ "fields": { "concept": "abeaef4a-f57c-40d3-bdb1-9d16bf0811b5", "valuetype": "prefLabel", - "value": "Polyhiearchy Collection 2", + "value": "Polyhierarchy Collection 2", "language": "en" } }, From b50924bd65d2b7106f46df24b72f4929955115d6 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 22:10:21 -0400 Subject: [PATCH 17/25] nit #23 --- .../0002_etl_collections_to_controlled_lists.py | 1 + tests/{ => fixtures}/data/controlled_lists.xlsx | Bin 2 files changed, 1 insertion(+) rename tests/{ => fixtures}/data/controlled_lists.xlsx (100%) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 4de85cc..8419e4f 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -240,6 +240,7 @@ class Migration(migrations.Migration): join values v on v.conceptid = lih.id where valuetype = 'prefLabel' or valuetype = 'altLabel' + or valuetype = 'hiddenLabel' or valuetype = 'scopeNote' or valuetype = 'definition' or valuetype = 'example' diff --git a/tests/data/controlled_lists.xlsx b/tests/fixtures/data/controlled_lists.xlsx similarity index 100% rename from tests/data/controlled_lists.xlsx rename to tests/fixtures/data/controlled_lists.xlsx From eaabd91ec9217a5aac620a5d13b9035bc2820227 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 22:11:00 -0400 Subject: [PATCH 18/25] Rearange test fixtures #23 --- tests/cli_tests.py | 14 ++++---------- .../data/polyhierarchical_collections.json | 0 tests/test_settings.py | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) rename tests/{ => fixtures}/data/polyhierarchical_collections.json (100%) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 7868f7c..2e13196 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -45,7 +45,9 @@ def test_export_controlled_list(self): class ListImportPackageTests(TestCase): def test_import_controlled_list(self): - input_file = os.path.join(PROJECT_TEST_ROOT, "data", "controlled_lists.xlsx") + input_file = os.path.join( + PROJECT_TEST_ROOT, "fixtures", "data", "controlled_lists.xlsx" + ) output = io.StringIO() # packages command does not yet fully avoid print() with captured_stdout(): @@ -62,15 +64,7 @@ def test_import_controlled_list(self): class RDMToControlledListsETLTests(TestCase): - - @classmethod - def setUpTestData(cls): - - management.call_command( - "loaddata", - "tests/data/polyhierarchical_collections.json", - format="json", - ) + fixtures = ["polyhierarchical_collections"] def test_migrate_collections_to_controlled_lists(self): output = io.StringIO() diff --git a/tests/data/polyhierarchical_collections.json b/tests/fixtures/data/polyhierarchical_collections.json similarity index 100% rename from tests/data/polyhierarchical_collections.json rename to tests/fixtures/data/polyhierarchical_collections.json diff --git a/tests/test_settings.py b/tests/test_settings.py index 06310ed..4537316 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -23,7 +23,7 @@ PACKAGE_NAME = "arches_references" PROJECT_TEST_ROOT = os.path.dirname(__file__) -MEDIA_ROOT = os.path.join(PROJECT_TEST_ROOT, "fixtures", "data") +FIXTURE_DIRS = [os.path.join(PROJECT_TEST_ROOT, "fixtures", "data")] BUSINESS_DATA_FILES = ( # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". From 7939a9f7b9b5c25155a87247384cd177a7656ded Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Wed, 11 Sep 2024 22:15:03 -0400 Subject: [PATCH 19/25] Add more robust tests for RDM to CLM migration #23 --- tests/cli_tests.py | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 2e13196..280e08c 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -7,7 +7,7 @@ from django.test.client import Client from django.test.utils import captured_stdout -from arches_references.models import List +from arches_references.models import List, ListItem, ListItemValue from arches.app.utils.skos import SKOSReader from .test_settings import PROJECT_TEST_ROOT @@ -71,7 +71,10 @@ def test_migrate_collections_to_controlled_lists(self): management.call_command( "controlled_lists", operation="migrate_collections_to_controlled_lists", - collections_to_migrate=["Polyhierarchical Collection Test"], + collections_to_migrate=[ + "Polyhierarchical Collection Test", + "Polyhierarchy Collection 2", + ], host="http://localhost:8000/plugins/controlled-list-manager/item/", preferred_sort_language="en", overwrite=False, @@ -82,6 +85,45 @@ def test_migrate_collections_to_controlled_lists(self): imported_items = imported_list.list_items.all() self.assertEqual(len(imported_items), 3) + imported_item_values = ListItemValue.objects.filter( + list_item__in=imported_items + ) + self.assertQuerySetEqual( + imported_item_values.values_list("value", flat=True).order_by("value"), + [ + "French Test Concept 1", + "French Test Concept 2", + "French Test Concept 3", + "Test Concept 1", + "Test Concept 2", + "Test Concept 3", + ], + ) + + imported_list_2 = List.objects.get(name="Polyhierarchy Collection 2") + imported_items_2 = imported_list_2.list_items.all() + imported_item_values_2 = ListItemValue.objects.filter( + list_item__in=imported_items_2 + ) + + # Check that new uuids were generated for polyhiearchical concepts + self.assertNotEqual( + imported_item_values.filter(value="Test Concept 1"), + imported_item_values_2.filter(value="Test Concept 1"), + ) + + # Check that items with multiple prefLabels in different languages have same listitemid + self.assertEqual( + imported_item_values.get(value="Test Concept 1").list_item_id, + imported_item_values.get(value="French Test Concept 1").list_item_id, + ) + + # But that items with prefLabls in different languages have different listitemvalue ids + self.assertNotEqual( + imported_item_values.get(value="Test Concept 1").pk, + imported_item_values.get(value="French Test Concept 1").pk, + ) + def test_no_matching_collection_error(self): expected_output = "Failed to find the following collections in the database: Collection That Doesn't Exist" output = io.StringIO() From 862903e2e2e289bbbfcd8a52d109cd44c6b275ff Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 12 Sep 2024 13:16:03 -0400 Subject: [PATCH 20/25] typo nits #23 --- tests/cli_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 280e08c..33e45db 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -106,7 +106,7 @@ def test_migrate_collections_to_controlled_lists(self): list_item__in=imported_items_2 ) - # Check that new uuids were generated for polyhiearchical concepts + # Check that new uuids were generated for polyhierarchical concepts self.assertNotEqual( imported_item_values.filter(value="Test Concept 1"), imported_item_values_2.filter(value="Test Concept 1"), @@ -118,7 +118,7 @@ def test_migrate_collections_to_controlled_lists(self): imported_item_values.get(value="French Test Concept 1").list_item_id, ) - # But that items with prefLabls in different languages have different listitemvalue ids + # But that items with prefLabels in different languages have different listitemvalue ids self.assertNotEqual( imported_item_values.get(value="Test Concept 1").pk, imported_item_values.get(value="French Test Concept 1").pk, From fae5c51d75a7263464ea43b80ed627cf8c529fbc Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 12 Sep 2024 16:22:57 -0400 Subject: [PATCH 21/25] Harden against nonexistent psl options #23 --- .../management/commands/controlled_lists.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py index f62d749..db09106 100644 --- a/arches_references/management/commands/controlled_lists.py +++ b/arches_references/management/commands/controlled_lists.py @@ -1,5 +1,5 @@ -from arches.app.models.models import Value -from django.core.management.base import BaseCommand +from arches.app.models.models import Value, Language +from django.core.management.base import BaseCommand, CommandError class Command(BaseCommand): @@ -58,12 +58,22 @@ def add_arguments(self, parser): def handle(self, *args, **options): if options["operation"] == "migrate_collections_to_controlled_lists": - self.migrate_collections_to_controlled_lists( - collections_to_migrate=options["collections_to_migrate"], - host=options["host"], - overwrite=options["overwrite"], - preferred_sort_language=options["preferred_sort_language"], - ) + psl = options["preferred_sort_language"] + try: + Language.objects.get(code=psl) + except Language.DoesNotExist: + raise CommandError( + "The preferred sort language, {0}, does not exist in the database.".format( + psl + ) + ) + else: + self.migrate_collections_to_controlled_lists( + collections_to_migrate=options["collections_to_migrate"], + host=options["host"], + overwrite=options["overwrite"], + preferred_sort_language=psl, + ) def migrate_collections_to_controlled_lists( self, From 9fcdd84899a96853284f02dff0f21baf324d447b Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 12 Sep 2024 16:23:24 -0400 Subject: [PATCH 22/25] Capture all possible value types #23 --- .../0002_etl_collections_to_controlled_lists.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py index 8419e4f..776e7cf 100644 --- a/arches_references/migrations/0002_etl_collections_to_controlled_lists.py +++ b/arches_references/migrations/0002_etl_collections_to_controlled_lists.py @@ -238,17 +238,9 @@ class Migration(migrations.Migration): v.valuetype from alpha_sorted_list_item_hierarchy lih join values v on v.conceptid = lih.id - where valuetype = 'prefLabel' - or valuetype = 'altLabel' - or valuetype = 'hiddenLabel' - or valuetype = 'scopeNote' - or valuetype = 'definition' - or valuetype = 'example' - or valuetype = 'historyNote' - or valuetype = 'editorialNote' - or valuetype = 'changeNote' - or valuetype = 'note' - or valuetype = 'description'; + where valuetype in ( + select valuetype from d_value_types where category in ('note', 'label') + ); end loop; -- Assign row number to help identify concepts that participate in multiple collections From b77e2ceeb1c6f217e9199edad233903a1746c031 Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 12 Sep 2024 16:41:35 -0400 Subject: [PATCH 23/25] Makes fixtures more accessible --- arches_references/settings.py | 3 +++ tests/test_settings.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arches_references/settings.py b/arches_references/settings.py index bd945b4..fadbf68 100644 --- a/arches_references/settings.py +++ b/arches_references/settings.py @@ -422,6 +422,9 @@ # TODO: remove when finalizing release SILENCED_SYSTEM_CHECKS += ["arches.E002"] +# Location for test data fixtures +FIXTURE_DIRS = [os.path.join(APP_ROOT, "..", "tests", "fixtures", "data")] + try: from .package_settings import * except ImportError: diff --git a/tests/test_settings.py b/tests/test_settings.py index 4537316..06310ed 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -23,7 +23,7 @@ PACKAGE_NAME = "arches_references" PROJECT_TEST_ROOT = os.path.dirname(__file__) -FIXTURE_DIRS = [os.path.join(PROJECT_TEST_ROOT, "fixtures", "data")] +MEDIA_ROOT = os.path.join(PROJECT_TEST_ROOT, "fixtures", "data") BUSINESS_DATA_FILES = ( # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". From 6c90e604a5893aeb45cfa83fceac290947c9832b Mon Sep 17 00:00:00 2001 From: Johnathan Clementi Date: Thu, 12 Sep 2024 16:42:21 -0400 Subject: [PATCH 24/25] Add test to ensure psl cmd error is functional #23 --- tests/cli_tests.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/cli_tests.py b/tests/cli_tests.py index 33e45db..b50ef6c 100644 --- a/tests/cli_tests.py +++ b/tests/cli_tests.py @@ -6,6 +6,7 @@ from django.test import TestCase from django.test.client import Client from django.test.utils import captured_stdout +from django.core.management.base import CommandError from arches_references.models import List, ListItem, ListItemValue from arches.app.utils.skos import SKOSReader @@ -137,3 +138,20 @@ def test_no_matching_collection_error(self): stderr=output, ) self.assertIn(expected_output, output.getvalue().strip()) + + def test_no_matching_language_error(self): + expected_output = ( + "The preferred sort language, nonexistent, does not exist in the database." + ) + output = io.StringIO() + with self.assertRaises(CommandError) as e: + management.call_command( + "controlled_lists", + operation="migrate_collections_to_controlled_lists", + collections_to_migrate=["Polyhierarchical Collection Test"], + host="http://localhost:8000/plugins/controlled-list-manager/item/", + preferred_sort_language="nonexistent", + overwrite=False, + stderr=output, + ) + self.assertEqual(expected_output, str(e.exception)) From f57a31bb0518f223a4267d120f7d461758e49eb3 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 12 Sep 2024 17:38:27 -0400 Subject: [PATCH 25/25] Error nicely if list with same name as collection already exists --- .../management/commands/controlled_lists.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arches_references/management/commands/controlled_lists.py b/arches_references/management/commands/controlled_lists.py index db09106..17f9ec3 100644 --- a/arches_references/management/commands/controlled_lists.py +++ b/arches_references/management/commands/controlled_lists.py @@ -1,4 +1,5 @@ from arches.app.models.models import Value, Language +from arches_references.models import List from django.core.management.base import BaseCommand, CommandError @@ -67,13 +68,20 @@ def handle(self, *args, **options): psl ) ) - else: - self.migrate_collections_to_controlled_lists( - collections_to_migrate=options["collections_to_migrate"], - host=options["host"], - overwrite=options["overwrite"], - preferred_sort_language=psl, - ) + + if not options["overwrite"]: + for collection_name in options["collections_to_migrate"]: + if List.objects.filter(name=collection_name).exists(): + raise CommandError( + f"The collection '{collection_name}' already exists." + ) + + self.migrate_collections_to_controlled_lists( + collections_to_migrate=options["collections_to_migrate"], + host=options["host"], + overwrite=options["overwrite"], + preferred_sort_language=psl, + ) def migrate_collections_to_controlled_lists( self,