Skip to content

Commit

Permalink
Merge pull request #358 from wcmc-its/relationship_strategy#341
Browse files Browse the repository at this point in the history
Gender strategy and misc improvements
  • Loading branch information
paulalbert1 authored Jun 12, 2019
2 parents 418bdce + 24c72ea commit dd4c22b
Show file tree
Hide file tree
Showing 24 changed files with 476,103 additions and 215 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -258,17 +258,17 @@
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-identity-model</artifactId>
<version>2.0.4</version>
<version>2.0.5</version>
</dependency>
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-article-model</artifactId>
<version>2.0.9</version>
<version>2.0.10</version>
</dependency>
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-dynamodb-model</artifactId>
<version>2.0.2</version>
<version>2.0.3</version>
</dependency>
<dependency>
<groupId>com.github.bohnman</groupId>
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/reciter/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,20 @@
import com.google.common.collect.Iterables;

import lombok.extern.slf4j.Slf4j;
import reciter.database.dyanmodb.files.GenderFileImport;
import reciter.database.dyanmodb.files.IdentityFileImport;
import reciter.database.dyanmodb.files.InstitutionAfidFileImport;
import reciter.database.dyanmodb.files.MeshTermFileImport;
import reciter.database.dyanmodb.files.ScienceMetrixDepartmentCategoryFileImport;
import reciter.database.dyanmodb.files.ScienceMetrixFileImport;
import reciter.database.dynamodb.model.Gender;
import reciter.database.dynamodb.model.InstitutionAfid;
import reciter.database.dynamodb.model.MeshTerm;
import reciter.database.dynamodb.model.ScienceMetrix;
import reciter.database.dynamodb.model.ScienceMetrixDepartmentCategory;
import reciter.engine.EngineParameters;
import reciter.security.APIKey;
import reciter.service.GenderService;
import reciter.service.ScienceMetrixDepartmentCategoryService;
import reciter.service.ScienceMetrixService;
import reciter.service.dynamo.DynamoDbInstitutionAfidService;
Expand Down Expand Up @@ -98,6 +101,9 @@ public class Application {
@Autowired
private DynamoDbInstitutionAfidService dynamoDbInstitutionAfidService;

@Autowired
private GenderService genderService;

@Value("${use.scopus.articles}")
private boolean useScopusArticles;

Expand Down Expand Up @@ -232,6 +238,9 @@ public void loadDynamoDbTablesAfterStartUp() {
IdentityFileImport identityFileImport = ApplicationContextHolder.getContext().getBean(IdentityFileImport.class);
identityFileImport.importIdentity();

GenderFileImport genderFileImport = ApplicationContextHolder.getContext().getBean(GenderFileImport.class);
genderFileImport.importGender();

if(useScopusArticles) {
InstitutionAfidFileImport institutionAfidFileImport = ApplicationContextHolder.getContext().getBean(InstitutionAfidFileImport.class);
institutionAfidFileImport.importInstitutionAfids();
Expand Down Expand Up @@ -263,6 +272,12 @@ public void populateStaticEngineParameters() {
}
EngineParameters.setMeshCountMap(meshCountMap);
}
log.info("Loading GenderProbability to Engine Parameters");
List<Gender> genders = genderService.findAll();
if(genders != null && !genders.isEmpty()) {
EngineParameters.setGenders(genders);
}

if(useScopusArticles) {
log.info("Loading ScopusInstitutionalAfids to Engine Parameters");
List<InstitutionAfid> instAfids = dynamoDbInstitutionAfidService.findAll();
Expand All @@ -271,6 +286,8 @@ public void populateStaticEngineParameters() {
EngineParameters.setAfiliationNameToAfidMap(institutionAfids);
}
}

log.info("ReCiter is up and ready to use. Please make sure its other components such as Pubmed-Retrieval-Tool is also setup if you wish to do retrieval.");
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import reciter.algorithm.evidence.targetauthor.department.strategy.DepartmentStringMatchStrategy;
import reciter.algorithm.evidence.targetauthor.email.EmailStrategyContext;
import reciter.algorithm.evidence.targetauthor.email.strategy.EmailStringMatchStrategy;
import reciter.algorithm.evidence.targetauthor.gender.GenderStrategyContext;
import reciter.algorithm.evidence.targetauthor.gender.strategy.GenderStrategy;
import reciter.algorithm.evidence.targetauthor.grant.GrantStrategyContext;
import reciter.algorithm.evidence.targetauthor.grant.strategy.GrantStrategy;
import reciter.algorithm.evidence.targetauthor.journalcategory.JournalCategoryStrategyContext;
Expand Down Expand Up @@ -150,9 +152,14 @@ public class ReCiterArticleScorer extends AbstractArticleScorer {
private StrategyContext personTypeStrategyContext;

/**
* Accpeted Rejected .
* Accepted Rejected .
*/
private StrategyContext acceptedRejectedStrategyContext;

/**
* Gender Strategy
*/
private GenderStrategyContext genderStrategyContext;

/**
* Remove clusters based on cluster information.
Expand Down Expand Up @@ -186,6 +193,7 @@ public ReCiterArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identit
this.journalCategoryStrategyContext = new JournalCategoryStrategyContext(new JournalCategoryStrategy());
this.knownRelationshipsStrategyContext = new KnownRelationshipStrategyContext(new KnownRelationshipStrategy());
this.affiliationStrategyContext = new AffiliationStrategyContext(new CommonAffiliationStrategy());
this.genderStrategyContext = new GenderStrategyContext(new GenderStrategy());

// Using the following strategy contexts in sequence to reassign individual articles
// to selected clusters.
Expand Down Expand Up @@ -238,6 +246,10 @@ public ReCiterArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identit
if(strategyParameters.isAverageClustering()) {
this.strategyContexts.add(this.averageClusteringStrategyContext);
}

if(strategyParameters.isGender()) {
this.strategyContexts.add(this.genderStrategyContext);
}
}


Expand Down Expand Up @@ -293,6 +305,10 @@ public void runArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identi
((ReCiterArticleStrategyContext) acceptedRejectedStrategyContext).executeStrategy(reCiterArticles);
}

if(strategyParameters.isGender()) {
((TargetAuthorStrategyContext) genderStrategyContext).executeStrategy(reCiterArticles, identity);
}

if (strategyParameters.isAverageClustering()) {
((ClusterStrategyContext) averageClusteringStrategyContext).executeStrategy(entry.getValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ public double executeStrategy(ReCiterCluster reCiterCluster) {
if(ReCiterArticleScorer.strategyParameters.isUseGoldStandardEvidence()) {
double totalArticleScoreWithoutClustering = ((reCiterArticle.getAuthorNameEvidence() != null)?(reCiterArticle.getAuthorNameEvidence().getNameScoreTotal()):0) +
((reCiterArticle.getEmailEvidence() != null)?reCiterArticle.getEmailEvidence().getEmailMatchScore():0) +
((reCiterArticle.getGenderEvidence() != null && reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy() != null)?reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy():0) +
reCiterArticle.getGrantEvidenceTotalScore() +
reCiterArticle.getRelationshipEvidencesTotalScore() +
((reCiterArticle.getRelationshipEvidence() != null)?reCiterArticle.getRelationshipEvidence().getRelationshipEvidenceTotalScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearBachelorScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearDoctoralScore():0) +
reCiterArticle.getOrganizationalEvidencesTotalScore() +
Expand All @@ -53,8 +54,9 @@ public double executeStrategy(ReCiterCluster reCiterCluster) {
} else {
double totalArticleScoreWithoutClustering = ((reCiterArticle.getAuthorNameEvidence() != null)?(reCiterArticle.getAuthorNameEvidence().getNameScoreTotal()):0) +
((reCiterArticle.getEmailEvidence() != null)?reCiterArticle.getEmailEvidence().getEmailMatchScore():0) +
((reCiterArticle.getGenderEvidence() != null && reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy() != null)?reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy():0) +
reCiterArticle.getGrantEvidenceTotalScore() +
reCiterArticle.getRelationshipEvidencesTotalScore() +
((reCiterArticle.getRelationshipEvidence() != null)?reCiterArticle.getRelationshipEvidence().getRelationshipEvidenceTotalScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearBachelorScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearDoctoralScore():0) +
reCiterArticle.getOrganizationalEvidencesTotalScore() +
Expand Down
Loading

0 comments on commit dd4c22b

Please sign in to comment.