Skip to content

Commit

Permalink
#2848 Migration work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
joemull committed May 30, 2022
1 parent 550d184 commit aa5bca0
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/identifiers/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Meta:
)

def clean(self):
super().clean()
cleaned_data = self.cleaned_data
id_type = self.cleaned_data.get('id_type')
identifier = self.cleaned_data.get('identifier')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2022-03-15 20:06
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


def deduplicate_identifiers(apps, schema_editor):
Identifier = apps.get_model("identifiers", "Identifier")
Journal = apps.get_model("journal", "Journal")
journals = Journal.objects.all()
for journal in journals:
identifiers_in_journal = Identifier.objects.filter(article__journal=journal)
for id_type in ['doi', 'uri', 'pubid']:
identifiers_of_type = identifiers_in_journal.filter(id_type=id_type)
for doi_string in set(identifiers_of_type.values_list('identifier', flat=True)):
to_keep_pk = identifiers_of_type.filter(identifier=doi_string).values_list('id', flat=True)[0]
duplicate_pks = identifiers_of_type.filter(identifier=doi_string).values_list('id', flat=True)[1:]
duplicate_identifiers = [identifier for identifier in Identifier.objects.filter(pk__in=duplicate_pks)]
print(id_type, to_keep_pk, doi_string)
if duplicate_identifiers:
print('\n\n\n')
print('To keep:')
print(id_type, to_keep_pk, doi_string)
print('Duplicates:')
for dup in duplicate_identifiers:
print(id_type, dup.pk, dup.identifier)
print('\n\n\n')
Identifier.objects.filter(pk__in=duplicate_pks).delete()


class Migration(migrations.Migration):
atomic = False
dependencies = [
('identifiers', '0008_batch_doi_registration_continued_20220524'),
]

operations = [
migrations.RunPython(deduplicate_identifiers, reverse_code=migrations.RunPython.noop),
]

0 comments on commit aa5bca0

Please sign in to comment.