From 6e2272503557046f4a89737998a953546c43205c Mon Sep 17 00:00:00 2001 From: nirmalc Date: Thu, 20 Jul 2017 16:10:38 -0500 Subject: [PATCH] Added max/min Scores to Pecentage Heuristic Adds two params to Percentage heuristic; min_score and max_score. minScore and maxScore to Percentile allows interesting aggregations like exclusive terms --- .../heuristics/PercentageScore.java | 58 ++++++++++++++++--- .../SignificanceHeuristicTests.java | 25 +++++++- 2 files changed, 72 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java index 9220adf87d2f4..04623dd07218a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/heuristics/PercentageScore.java @@ -22,6 +22,7 @@ import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -33,15 +34,28 @@ public class PercentageScore extends SignificanceHeuristic { public static final String NAME = "percentage"; - public PercentageScore() { + private static final ParseField MAX_SCORE = new ParseField("max_score"); + private static final ParseField MIN_SCORE = new ParseField("min_score"); + private final double maxScore; + private final double minScore; + + public PercentageScore(double minScore,double maxScore) { + this.minScore = minScore; + this.maxScore = maxScore; } - public PercentageScore(StreamInput in) { - // Nothing to read. + public PercentageScore() { + this(0,1); + } + public PercentageScore(StreamInput in) throws IOException{ + minScore = in.readLong(); + maxScore = in.readLong(); } @Override public void writeTo(StreamOutput out) throws IOException { + out.writeDouble(minScore); + out.writeDouble(maxScore); } @Override @@ -51,17 +65,39 @@ public String getWriteableName() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(NAME).endObject(); + builder.startObject(NAME) + .field(MIN_SCORE.getPreferredName(), minScore) + .field(MAX_SCORE.getPreferredName(), maxScore) + .endObject(); return builder; } + public double getMinScore() { + return minScore; + } + + public double getMaxScore() { + return maxScore; + } + public static SignificanceHeuristic parse(XContentParser parser) throws IOException, QueryShardException { - // move to the closing bracket - if (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { - throw new ElasticsearchParseException("failed to parse [percentage] significance heuristic. expected an empty object, but got [{}] instead", parser.currentToken()); + double minScore = 0; + double maxScore = 1; + XContentParser.Token token = parser.nextToken(); + while (!token.equals(XContentParser.Token.END_OBJECT)) { + if (MIN_SCORE.match(parser.currentName())) { + parser.nextToken(); + minScore = parser.doubleValue(); + } else if (MAX_SCORE.match(parser.currentName())) { + parser.nextToken(); + maxScore = parser.doubleValue(); + } else { + throw new ElasticsearchParseException("failed to parse percent heuristic. unknown field [{}]", parser.currentName()); + } + token = parser.nextToken(); } - return new PercentageScore(); + return new PercentageScore(minScore,maxScore); } /** @@ -75,7 +111,11 @@ public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long // avoid a divide by zero issue return 0; } - return (double) subsetFreq / (double) supersetFreq; + double score = (double) subsetFreq / (double) supersetFreq; + if ( score >= minScore && score <= maxScore) { + return score; + } + return 0; } @Override diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java index 796355ebfb190..2d630f74728fd 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java @@ -408,18 +408,39 @@ public void testAssertions() throws Exception { testBackgroundAssertions(new MutualInformation(true, true), new MutualInformation(true, false)); testBackgroundAssertions(new ChiSquare(true, true), new ChiSquare(true, false)); testBackgroundAssertions(new GND(true), new GND(false)); - testAssertions(new PercentageScore()); + testAssertions(new PercentageScore(0,1)); testAssertions(new JLHScore()); } public void testBasicScoreProperties() { basicScoreProperties(new JLHScore(), true); basicScoreProperties(new GND(true), true); - basicScoreProperties(new PercentageScore(), true); + basicScoreProperties(new PercentageScore(0, 1), true); basicScoreProperties(new MutualInformation(true, true), false); basicScoreProperties(new ChiSquare(true, true), false); } + public void testPercentScoreProperties() { + SignificanceHeuristic heuristic = new PercentageScore(0,0); + assertThat(heuristic.getScore(1, 1, 1, 3), equalTo(0.0)); + heuristic = new PercentageScore(1,1); + assertThat(heuristic.getScore(1, 1, 2, 3), equalTo(0.0)); + assertThat(heuristic.getScore(2, 2, 2, 10), equalTo(1.0)); + + } + + public void testPercentHeuristicParse() throws IOException { + SearchModule searchModule = new SearchModule(Settings.EMPTY, false, emptyList()); + ParseFieldRegistry significanceHeuristicParserRegistry = searchModule.getSignificanceHeuristicParserRegistry(); + PercentageScore percentageScore = (PercentageScore) parseFromString(significanceHeuristicParserRegistry,"\"percentage\" : { \"min_score\" : 0.0 , \"max_score\" : 1.0 }"); + assertThat(percentageScore.getMaxScore(), equalTo(1.0)); + assertThat(percentageScore.getMinScore(), equalTo(0.0)); + percentageScore = (PercentageScore) parseFromString(significanceHeuristicParserRegistry,"\"percentage\" : {}"); + assertThat(percentageScore.getMaxScore(), equalTo(1.0)); + assertThat(percentageScore.getMinScore(), equalTo(0.0)); + } + + public void basicScoreProperties(SignificanceHeuristic heuristic, boolean test0) { assertThat(heuristic.getScore(1, 1, 1, 3), greaterThan(0.0)); assertThat(heuristic.getScore(1, 1, 2, 3), lessThan(heuristic.getScore(1, 1, 1, 3)));