diff --git a/src/identifiers/forms.py b/src/identifiers/forms.py index d1196bfa0b..5401f5413a 100644 --- a/src/identifiers/forms.py +++ b/src/identifiers/forms.py @@ -16,6 +16,7 @@ class Meta: ) def clean(self): + super().clean() cleaned_data = self.cleaned_data id_type = self.cleaned_data.get('id_type') identifier = self.cleaned_data.get('identifier') diff --git a/src/identifiers/migrations/0009_deduplicate_identifiers_20220527.py b/src/identifiers/migrations/0009_deduplicate_identifiers_20220527.py new file mode 100644 index 0000000000..f2fc758caa --- /dev/null +++ b/src/identifiers/migrations/0009_deduplicate_identifiers_20220527.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2022-03-15 20:06 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +def deduplicate_identifiers(apps, schema_editor): + Identifier = apps.get_model("identifiers", "Identifier") + Journal = apps.get_model("journal", "Journal") + journals = Journal.objects.all() + for journal in journals: + identifiers_in_journal = Identifier.objects.filter(article__journal=journal) + for id_type in ['doi', 'uri', 'pubid']: + identifiers_of_type = identifiers_in_journal.filter(id_type=id_type) + for doi_string in set(identifiers_of_type.values_list('identifier', flat=True)): + to_keep_pk = identifiers_of_type.filter(identifier=doi_string).values_list('id', flat=True)[0] + duplicate_pks = identifiers_of_type.filter(identifier=doi_string).values_list('id', flat=True)[1:] + duplicate_identifiers = [identifier for identifier in Identifier.objects.filter(pk__in=duplicate_pks)] + print(id_type, to_keep_pk, doi_string) + if duplicate_identifiers: + print('\n\n\n') + print('To keep:') + print(id_type, to_keep_pk, doi_string) + print('Duplicates:') + for dup in duplicate_identifiers: + print(id_type, dup.pk, dup.identifier) + print('\n\n\n') + Identifier.objects.filter(pk__in=duplicate_pks).delete() + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ('identifiers', '0008_batch_doi_registration_continued_20220524'), + ] + + operations = [ + migrations.RunPython(deduplicate_identifiers, reverse_code=migrations.RunPython.noop), + ]