Skip to content

Commit

Permalink
Merge pull request #39 from aavaas/master
Browse files Browse the repository at this point in the history
#35 fix DateMatch with NeighborhoodRange greater than 0.91 failing
  • Loading branch information
manishobhatia authored Nov 12, 2020
2 parents 9a877f9 + 0b35c27 commit 8c026c1
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 7 deletions.
19 changes: 12 additions & 7 deletions src/main/java/com/intuit/fuzzymatcher/component/TokenRepo.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ private class Repo {
TreeSet<Object> tokenBinaryTree;

private final Double AGE_PCT_OF = 10D;
private final Double DATE_PCT_OF = 15777e7D; // 5 years of range


Repo(MatchType matchType) {
Expand Down Expand Up @@ -77,10 +78,15 @@ Set<Element> get(Token token) {
return tokenElementSet.get(token.getValue());
case NEAREST_NEIGHBORS:
TokenRange tokenRange;
if (token.getElement().getElementClassification().getElementType().equals(ElementType.AGE)) {
tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange(), AGE_PCT_OF);
} else {
tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange());
switch (token.getElement().getElementClassification().getElementType()){
case AGE:
tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange(), AGE_PCT_OF);
break;
case DATE:
tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange(), DATE_PCT_OF);
break;
default:
tokenRange = new TokenRange(token, token.getElement().getNeighborhoodRange());
}
return tokenBinaryTree.subSet(tokenRange.lower, true, tokenRange.higher, true)
.stream()
Expand All @@ -95,7 +101,6 @@ private class TokenRange {

private final Object lower;
private final Object higher;
private static final double DATE_SCALE_FACTOR = 1.1;

TokenRange(Token token, double pct, Double pctOf) {
Object value = token.getValue();
Expand All @@ -112,8 +117,8 @@ private class TokenRange {
this.lower = getLower((Float) value, pct, pctOf).floatValue();
this.higher = getHigher((Float) value, pct, pctOf).floatValue();
} else if (value instanceof Date) {
this.lower = new Date(getLower(((Date) value).getTime(), pct * DATE_SCALE_FACTOR, pctOf).longValue());
this.higher = new Date(getHigher(((Date) value).getTime(), pct * DATE_SCALE_FACTOR, pctOf).longValue());
this.lower = new Date(getLower(((Date) value).getTime(), pct, pctOf).longValue());
this.higher = new Date(getHigher(((Date) value).getTime(), pct, pctOf).longValue());
} else {
throw new MatchException("Data Type not supported");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,14 @@ public void itShouldApplyMatchWithDate() {
Assert.assertEquals(2, result.size());
}

@Test
public void itShouldApplyMatchWithDateForHighNeighborhoodRange() {
List<Object> dates = Arrays.asList(getDate("01/01/2020"), getDate("01/02/2020"), getDate("02/01/2019"));
List<Document> documentList = getTestDocuments(dates, DATE, 0.99); //0.99 neighborhood is about 18 days
Map<Document, List<Match<Document>>> result = matchService.applyMatch(documentList);
Assert.assertEquals(2, result.size());
}

@Test
public void itShouldApplyMatchWithAge() {
List<Object> numbers = Arrays.asList(1, 2, 9, 10, 11, 45, 49, 50, 52, 55, 90, 95, 100, 107, 115);
Expand Down

0 comments on commit 8c026c1

Please sign in to comment.