Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/clean duplicated documents 352 #77

Merged
merged 10 commits into from
Feb 8, 2024
2 changes: 1 addition & 1 deletion ansible/roles/deploy_backend/defaults/main/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
active_deadline_seconds: 1800
command: "dist/scripts/importChainedDocumentsFromSder.js --count 500 --threshold 1500"
- name: "export-j-4"
schedule: "30 17 * * *"
schedule: "25 17 * * *"
successful_jobs_history_limit: 7
failed_jobs_history_limit: 7
backoff_limit: 2
Expand Down
12 changes: 0 additions & 12 deletions packages/courDeCassation/src/scripts/cleanOrphansTreatments.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import { parametersHandler } from '../lib/parametersHandler';
(async () => {
const { environment, settings } = await parametersHandler.getParameters();
const backend = buildBackend(environment, settings);

await backend.runScript(
() => backend.scripts.cleanDocuments.run(),
backend.scripts.cleanDocuments.option,
);

const sderExporter = buildSderExporter(environment, settings);
backend.runScript(
() => sderExporter.exportAllRejectedDocuments(environment),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import { parametersHandler } from '../lib/parametersHandler';
(async () => {
const { environment, settings } = await parametersHandler.getParameters();
const backend = buildBackend(environment, settings);

await backend.runScript(
() => backend.scripts.cleanDocuments.run(),
backend.scripts.cleanDocuments.option,
);

const sderExporter = buildSderExporter(environment, settings);

backend.runScript(() => sderExporter.exportAllTreatedDocuments(environment), {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import { parametersHandler } from '../lib/parametersHandler';
const { environment, settings } = await parametersHandler.getParameters();
const { days } = parseArgv();
const backend = buildBackend(environment, settings);

await backend.runScript(
() => backend.scripts.cleanDocuments.run(),
backend.scripts.cleanDocuments.option,
);

const sderExporter = buildSderExporter(environment, settings);
backend.runScript(
() => sderExporter.exportTreatedDocumentsSince(days, environment),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import { parametersHandler } from '../lib/parametersHandler';
(async () => {
const { environment, settings } = await parametersHandler.getParameters();
const backend = buildBackend(environment, settings);

await backend.runScript(
() => backend.scripts.cleanDocuments.run(),
backend.scripts.cleanDocuments.option,
);

const sderExporter = buildSderExporter(environment, settings);

backend.runScript(
Expand Down
5 changes: 0 additions & 5 deletions packages/generic/backend/src/app/buildBackend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
resetUntreatedDocumentsForTest,
revertOneMigration,
runNewMigrations,
cleanOrphansTreatments,
} from './scripts';

export { buildBackend };
Expand Down Expand Up @@ -111,10 +110,6 @@ function buildBackend(environment: environmentType, settings: settingsType) {
run: runNewMigrations,
option: { shouldLoadDb: true, shouldExit: true },
},
cleanOrphansTreatments: {
run: cleanOrphansTreatments,
option: { shouldLoadDb: true, shouldExit: true },
},
},
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,16 @@ import { cleanDuplicatedDocuments } from './cleanDuplicatedDocuments';
describe('cleanDuplicatedDocuments', () => {
it('should clean the DuplicatedDocuments', async () => {
const firstDocument = documentModule.generator.generate();
const secondDocument = documentModule.generator.generate();
const secondDocument = documentModule.generator.generate({
creationDate: 1274657452000,
});
const secondDocumentWithHigherStatus = documentModule.generator.generate({
...secondDocument,
creationDate: 1674657452000,
_id: idModule.lib.buildId(),
status: 'done',
});

const documentRepository = buildDocumentRepository();
await documentRepository.insertMany([
firstDocument,
Expand All @@ -22,9 +26,15 @@ describe('cleanDuplicatedDocuments', () => {

const fetchedDocuments = await documentRepository.findAll();

const fetchedIds = fetchedDocuments.map((u) => u._id).sort();
const fetchedIds = fetchedDocuments
.map((u) => {
return u._id;
})
.sort();
const expectedIds = [firstDocument, secondDocumentWithHigherStatus]
.map((u) => u._id)
.map((u) => {
return u._id;
})
.sort();
expect(fetchedIds).toEqual(expectedIds);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { logger } from '../../../utils';
export { cleanDuplicatedDocuments };

/**
* Delete all doubled documents (same source, same documentNumber, same text)
* Delete all doubled documents (same source, same documentNumber and same text) keep the most recent document by dateCreation
*/
async function cleanDuplicatedDocuments() {
logger.log({ operationName: 'cleanDuplicatedDocuments', msg: 'START' });
Expand All @@ -29,7 +29,15 @@ async function cleanDuplicatedDocuments() {
const currentDocument = sortedDocuments[index];
const nextDocument = sortedDocuments[index + 1];
if (areDocumentsIdentical(currentDocument, nextDocument)) {
documentsToDelete.push(nextDocument);
if (
currentDocument.creationDate &&
nextDocument.creationDate &&
currentDocument.creationDate > nextDocument.creationDate
) {
documentsToDelete.push(nextDocument);
} else {
documentsToDelete.push(currentDocument);
}
}
}

Expand All @@ -44,6 +52,7 @@ async function cleanDuplicatedDocuments() {
logger.log({ operationName: 'cleanDuplicatedDocuments', msg: 'DONE' });
}

//
function areDocumentsIdentical(
document1: documentType,
document2: documentType,
Expand Down
2 changes: 0 additions & 2 deletions packages/generic/backend/src/app/scripts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import { resetUntreatedDocumentsForTest } from './resetUntreatedDocumentsForTest
import { purgeDb } from './purgeDb';
import { renewCache } from './renewCache';
import { setIndexesOnAllCollections } from './setIndexesOnAllCollections';
import { cleanOrphansTreatments } from './cleanDocuments/cleanOrphansTreatments';

export {
cleanDocuments,
Expand All @@ -43,5 +42,4 @@ export {
revertOneMigration,
runNewMigrations,
setIndexesOnAllCollections,
cleanOrphansTreatments,
};
Loading