Skip to content

Commit

Permalink
added gender strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
sarbajitdutta committed Jun 12, 2019
1 parent de26901 commit 285b217
Show file tree
Hide file tree
Showing 14 changed files with 604 additions and 103 deletions.
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -258,17 +258,17 @@
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-identity-model</artifactId>
<version>2.0.4</version>
<version>2.0.5</version>
</dependency>
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-article-model</artifactId>
<version>2.0.10-SNAPSHOT</version>
<version>2.0.10</version>
</dependency>
<dependency>
<groupId>edu.cornell.weill.reciter</groupId>
<artifactId>reciter-dynamodb-model</artifactId>
<version>2.0.3-SNAPSHOT</version>
<version>2.0.3</version>
</dependency>
<dependency>
<groupId>com.github.bohnman</groupId>
Expand Down
21 changes: 17 additions & 4 deletions src/main/java/reciter/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@
import reciter.database.dyanmodb.files.MeshTermFileImport;
import reciter.database.dyanmodb.files.ScienceMetrixDepartmentCategoryFileImport;
import reciter.database.dyanmodb.files.ScienceMetrixFileImport;
import reciter.database.dynamodb.model.Gender;
import reciter.database.dynamodb.model.InstitutionAfid;
import reciter.database.dynamodb.model.MeshTerm;
import reciter.database.dynamodb.model.ScienceMetrix;
import reciter.database.dynamodb.model.ScienceMetrixDepartmentCategory;
import reciter.engine.EngineParameters;
import reciter.security.APIKey;
import reciter.service.GenderService;
import reciter.service.ScienceMetrixDepartmentCategoryService;
import reciter.service.ScienceMetrixService;
import reciter.service.dynamo.DynamoDbInstitutionAfidService;
Expand Down Expand Up @@ -99,6 +101,9 @@ public class Application {
@Autowired
private DynamoDbInstitutionAfidService dynamoDbInstitutionAfidService;

@Autowired
private GenderService genderService;

@Value("${use.scopus.articles}")
private boolean useScopusArticles;

Expand Down Expand Up @@ -222,16 +227,16 @@ public void checkScopusPubmedService() {
public void loadDynamoDbTablesAfterStartUp() {
if(isFileImport) {
ScienceMetrixDepartmentCategoryFileImport scienceMetrixDepartmentCategoryFileImport = ApplicationContextHolder.getContext().getBean(ScienceMetrixDepartmentCategoryFileImport.class);
//scienceMetrixDepartmentCategoryFileImport.importScienceMetrixDepartmentCategory();
scienceMetrixDepartmentCategoryFileImport.importScienceMetrixDepartmentCategory();

ScienceMetrixFileImport scienceMetrixFileImport = ApplicationContextHolder.getContext().getBean(ScienceMetrixFileImport.class);
//scienceMetrixFileImport.importScienceMetrix();
scienceMetrixFileImport.importScienceMetrix();

MeshTermFileImport meshTermFileImport = ApplicationContextHolder.getContext().getBean(MeshTermFileImport.class);
//meshTermFileImport.importMeshTerms();
meshTermFileImport.importMeshTerms();

IdentityFileImport identityFileImport = ApplicationContextHolder.getContext().getBean(IdentityFileImport.class);
//identityFileImport.importIdentity();
identityFileImport.importIdentity();

GenderFileImport genderFileImport = ApplicationContextHolder.getContext().getBean(GenderFileImport.class);
genderFileImport.importGender();
Expand Down Expand Up @@ -267,6 +272,12 @@ public void populateStaticEngineParameters() {
}
EngineParameters.setMeshCountMap(meshCountMap);
}
log.info("Loading GenderProbability to Engine Parameters");
List<Gender> genders = genderService.findAll();
if(genders != null && !genders.isEmpty()) {
EngineParameters.setGenders(genders);
}

if(useScopusArticles) {
log.info("Loading ScopusInstitutionalAfids to Engine Parameters");
List<InstitutionAfid> instAfids = dynamoDbInstitutionAfidService.findAll();
Expand All @@ -275,6 +286,8 @@ public void populateStaticEngineParameters() {
EngineParameters.setAfiliationNameToAfidMap(institutionAfids);
}
}

log.info("ReCiter is up and ready to use. Please make sure its other components such as Pubmed-Retrieval-Tool is also setup if you wish to do retrieval.");
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import reciter.algorithm.evidence.targetauthor.department.strategy.DepartmentStringMatchStrategy;
import reciter.algorithm.evidence.targetauthor.email.EmailStrategyContext;
import reciter.algorithm.evidence.targetauthor.email.strategy.EmailStringMatchStrategy;
import reciter.algorithm.evidence.targetauthor.gender.GenderStrategyContext;
import reciter.algorithm.evidence.targetauthor.gender.strategy.GenderStrategy;
import reciter.algorithm.evidence.targetauthor.grant.GrantStrategyContext;
import reciter.algorithm.evidence.targetauthor.grant.strategy.GrantStrategy;
import reciter.algorithm.evidence.targetauthor.journalcategory.JournalCategoryStrategyContext;
Expand Down Expand Up @@ -150,9 +152,14 @@ public class ReCiterArticleScorer extends AbstractArticleScorer {
private StrategyContext personTypeStrategyContext;

/**
* Accpeted Rejected .
* Accepted Rejected .
*/
private StrategyContext acceptedRejectedStrategyContext;

/**
* Gender Strategy
*/
private GenderStrategyContext genderStrategyContext;

/**
* Remove clusters based on cluster information.
Expand Down Expand Up @@ -186,6 +193,7 @@ public ReCiterArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identit
this.journalCategoryStrategyContext = new JournalCategoryStrategyContext(new JournalCategoryStrategy());
this.knownRelationshipsStrategyContext = new KnownRelationshipStrategyContext(new KnownRelationshipStrategy());
this.affiliationStrategyContext = new AffiliationStrategyContext(new CommonAffiliationStrategy());
this.genderStrategyContext = new GenderStrategyContext(new GenderStrategy());

// Using the following strategy contexts in sequence to reassign individual articles
// to selected clusters.
Expand Down Expand Up @@ -238,6 +246,10 @@ public ReCiterArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identit
if(strategyParameters.isAverageClustering()) {
this.strategyContexts.add(this.averageClusteringStrategyContext);
}

if(strategyParameters.isGender()) {
this.strategyContexts.add(this.genderStrategyContext);
}
}


Expand Down Expand Up @@ -293,6 +305,10 @@ public void runArticleScorer(Map<Long, ReCiterCluster> clusters, Identity identi
((ReCiterArticleStrategyContext) acceptedRejectedStrategyContext).executeStrategy(reCiterArticles);
}

if(strategyParameters.isGender()) {
((TargetAuthorStrategyContext) genderStrategyContext).executeStrategy(reCiterArticles, identity);
}

if (strategyParameters.isAverageClustering()) {
((ClusterStrategyContext) averageClusteringStrategyContext).executeStrategy(entry.getValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ public double executeStrategy(ReCiterCluster reCiterCluster) {
if(ReCiterArticleScorer.strategyParameters.isUseGoldStandardEvidence()) {
double totalArticleScoreWithoutClustering = ((reCiterArticle.getAuthorNameEvidence() != null)?(reCiterArticle.getAuthorNameEvidence().getNameScoreTotal()):0) +
((reCiterArticle.getEmailEvidence() != null)?reCiterArticle.getEmailEvidence().getEmailMatchScore():0) +
((reCiterArticle.getGenderEvidence() != null && reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy() != null)?reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy():0) +
reCiterArticle.getGrantEvidenceTotalScore() +
((reCiterArticle.getRelationshipEvidence() != null)?reCiterArticle.getRelationshipEvidence().getRelationshipEvidenceTotalScore():0) +
//reCiterArticle.getRelationshipEvidencesTotalScore() +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearBachelorScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearDoctoralScore():0) +
reCiterArticle.getOrganizationalEvidencesTotalScore() +
Expand All @@ -54,9 +54,9 @@ public double executeStrategy(ReCiterCluster reCiterCluster) {
} else {
double totalArticleScoreWithoutClustering = ((reCiterArticle.getAuthorNameEvidence() != null)?(reCiterArticle.getAuthorNameEvidence().getNameScoreTotal()):0) +
((reCiterArticle.getEmailEvidence() != null)?reCiterArticle.getEmailEvidence().getEmailMatchScore():0) +
((reCiterArticle.getGenderEvidence() != null && reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy() != null)?reCiterArticle.getGenderEvidence().getGenderScoreIdentityArticleDiscrepancy():0) +
reCiterArticle.getGrantEvidenceTotalScore() +
((reCiterArticle.getRelationshipEvidence() != null)?reCiterArticle.getRelationshipEvidence().getRelationshipEvidenceTotalScore():0) +
//reCiterArticle.getRelationshipEvidencesTotalScore() +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearBachelorScore():0) +
((reCiterArticle.getEducationYearEvidence() != null)?reCiterArticle.getEducationYearEvidence().getDiscrepancyDegreeYearDoctoralScore():0) +
reCiterArticle.getOrganizationalEvidencesTotalScore() +
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package reciter.algorithm.evidence.targetauthor.gender;

import reciter.algorithm.evidence.targetauthor.AbstractTargetAuthorStrategyContext;
import reciter.algorithm.evidence.targetauthor.TargetAuthorStrategy;

public class GenderStrategyContext extends AbstractTargetAuthorStrategyContext {

public GenderStrategyContext(TargetAuthorStrategy strategy) {
super(strategy);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package reciter.algorithm.evidence.targetauthor.gender.strategy;

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.List;


import lombok.extern.slf4j.Slf4j;
import reciter.algorithm.cluster.article.scorer.ReCiterArticleScorer;
import reciter.algorithm.evidence.targetauthor.AbstractTargetAuthorStrategy;
import reciter.database.dynamodb.model.Gender;
import reciter.engine.Feature;
import reciter.engine.analysis.evidence.GenderEvidence;
import reciter.model.article.ReCiterArticle;
import reciter.model.identity.Identity;
import reciter.utils.GenderProbability;

@Slf4j
public class GenderStrategy extends AbstractTargetAuthorStrategy {

@Override
public double executeStrategy(ReCiterArticle reCiterArticle, Identity identity) {
return 0;
}

@Override
public double executeStrategy(List<ReCiterArticle> reCiterArticles, Identity identity) {
if(identity.getGender() != null) {
Gender identityGender = identity.getGender();
for (ReCiterArticle reCiterArticle : reCiterArticles) {
Gender genderArticle = GenderProbability.getGenderArticleProbability(reCiterArticle);
Double genderScore = null;
if(genderArticle != null && identityGender != null) {
genderScore = ((1 - Math.abs(identityGender.getProbability() - genderArticle.getProbability())) * ReCiterArticleScorer.strategyParameters.getGenderStrategyRangeScore() + ReCiterArticleScorer.strategyParameters.getGenderStrategyMinScore());
}
GenderEvidence genderEvidence = new GenderEvidence();
if(genderArticle != null) {
genderEvidence.setGenderScoreArticle(BigDecimal.valueOf(genderArticle.getProbability()).setScale(2, RoundingMode.HALF_DOWN).doubleValue());
}
if(identityGender != null) {
genderEvidence.setGenderScoreIdentity(BigDecimal.valueOf(identityGender.getProbability()).setScale(2, RoundingMode.HALF_DOWN).doubleValue());
}
if(genderScore != null) {
genderEvidence.setGenderScoreIdentityArticleDiscrepancy(BigDecimal.valueOf(genderScore).setScale(2, RoundingMode.HALF_DOWN).doubleValue());
}
reCiterArticle.setGenderEvidence(genderEvidence);
log.info("Pmid: " + reCiterArticle.getArticleId() + " " + genderEvidence.toString());
}
}

return 0;
}

@Override
public void populateFeature(ReCiterArticle reCiterArticle, Identity identity, Feature feature) {

}

}
6 changes: 5 additions & 1 deletion src/main/java/reciter/controller/ReCiterController.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import reciter.service.dynamo.DynamoDbMeshTermService;
import reciter.service.dynamo.IDynamoDbGoldStandardService;
import reciter.utils.AuthorNameSanitizationUtils;
import reciter.utils.GenderProbability;
import reciter.utils.InstitutionSanitizationUtil;
import reciter.xml.retriever.engine.ReCiterRetrievalEngine;

Expand Down Expand Up @@ -807,7 +808,10 @@ private EngineParameters initializeEngineParameters(String uid, Double totalStan
//Sanitize Identity Organizational Units(Division and Department)
InstitutionSanitizationUtil institutionalSanitizationUtil = new InstitutionSanitizationUtil(strategyParameters);
institutionalSanitizationUtil.populateSanitizedIdentityInstitutions(identity);


//Find gender probability
GenderProbability.getGenderIdentityProbability(identity);

// calculate precision and recall
EngineParameters parameters = new EngineParameters();
parameters.setIdentity(identity);
Expand Down
108 changes: 18 additions & 90 deletions src/main/java/reciter/engine/EngineParameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*******************************************************************************/
package reciter.engine;

import reciter.database.dynamodb.model.Gender;
import reciter.database.dynamodb.model.ScienceMetrix;
import reciter.database.dynamodb.model.ScienceMetrixDepartmentCategory;
import reciter.model.article.ReCiterArticle;
Expand All @@ -28,106 +29,33 @@
import java.util.List;
import java.util.Map;

public class EngineParameters {
import lombok.Data;
import lombok.Getter;
import lombok.Setter;

@Data
public class EngineParameters {

@Getter
@Setter
private static Map<String, Long> meshCountMap;
@Getter
@Setter
private static Map<String, List<String>> afiliationNameToAfidMap;
@Getter
@Setter
private static List<ScienceMetrix> scienceMetrixJournals;
@Getter
@Setter
private static List<ScienceMetrixDepartmentCategory> scienceMetrixDepartmentCategories;
@Getter
@Setter
private static List<Gender> genders;
private Identity identity;
private List<PubMedArticle> pubMedArticles;
private List<ScopusArticle> scopusArticles;
private List<ReCiterArticle> reciterArticles;
private List<Long> knownPmids;
private List<Long> rejectedPmids;
private double totalStandardzizedArticleScore;

public static Map<String, Long> getMeshCountMap() {
return meshCountMap;
}

public static void setMeshCountMap(Map<String, Long> meshCountMap) {
EngineParameters.meshCountMap = meshCountMap;
}

public static Map<String, List<String>> getAfiliationNameToAfidMap() {
return afiliationNameToAfidMap;
}

public static void setAfiliationNameToAfidMap(Map<String, List<String>> afiliationNameToAfidMap) {
EngineParameters.afiliationNameToAfidMap = afiliationNameToAfidMap;
}

public static List<ScienceMetrix> getScienceMetrixJournals() {
return scienceMetrixJournals;
}

public static void setScienceMetrixJournals(List<ScienceMetrix> scienceMetrixJournals) {
EngineParameters.scienceMetrixJournals = scienceMetrixJournals;
}

public static List<ScienceMetrixDepartmentCategory> getScienceMetrixDepartmentCategories() {
return scienceMetrixDepartmentCategories;
}

public static void setScienceMetrixDepartmentCategories(
List<ScienceMetrixDepartmentCategory> scienceMetrixDepartmentCategories) {
EngineParameters.scienceMetrixDepartmentCategories = scienceMetrixDepartmentCategories;
}

public Identity getIdentity() {
return identity;
}

public void setIdentity(Identity identity) {
this.identity = identity;
}

public List<PubMedArticle> getPubMedArticles() {
return pubMedArticles;
}

public void setPubMedArticles(List<PubMedArticle> pubMedArticles) {
this.pubMedArticles = pubMedArticles;
}

public List<ScopusArticle> getScopusArticles() {
return scopusArticles;
}

public void setScopusArticles(List<ScopusArticle> scopusArticles) {
this.scopusArticles = scopusArticles;
}

public List<ReCiterArticle> getReciterArticles() {
return reciterArticles;
}

public void setReciterArticles(List<ReCiterArticle> reciterArticles) {
this.reciterArticles = reciterArticles;
}

public List<Long> getKnownPmids() {
return knownPmids;
}

public void setKnownPmids(List<Long> knownPmids) {
this.knownPmids = knownPmids;
}

public List<Long> getRejectedPmids() {
return rejectedPmids;
}

public void setRejectedPmids(List<Long> rejectedPmids) {
this.rejectedPmids = rejectedPmids;
}

public double getTotalStandardzizedArticleScore() {
return totalStandardzizedArticleScore;
}

public void setTotalStandardzizedArticleScore(double totalStandardzizedArticleScore) {
this.totalStandardzizedArticleScore = totalStandardzizedArticleScore;
}
}
Loading

1 comment on commit 285b217

@jl987-Jie
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really awesome!

Please sign in to comment.