diff --git a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc index 1d225c91e26d8..b8459ee6e618c 100644 --- a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc @@ -332,7 +332,7 @@ Top hits response snippet with a nested hit, which resides in the first slot of "value": 1, "relation": "eq" }, - "max_score": 0.3616575, + "max_score": 0.16438977, "hits": [ { "_index": "sales", @@ -342,7 +342,7 @@ Top hits response snippet with a nested hit, which resides in the first slot of "field": "comments", <1> "offset": 0 <2> }, - "_score": 0.3616575, + "_score": 0.16438977, "_source": { "comment": "This car could have better brakes", <3> "username": "baddriver007" diff --git a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc index 046f6441c07b9..e273cc52c5ae8 100644 --- a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc +++ b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc @@ -245,13 +245,13 @@ The output from the above is: "value": 1, "relation": "eq" }, - "max_score": 0.2876821, + "max_score": 0.13076457, "hits": [ { "_index": "my_index", "_type": "_doc", "_id": "1", - "_score": 0.2876821, + "_score": 0.13076457, "_source": { "text": "The fooBarBaz method" }, diff --git a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc index a34f5c801939e..90dc16d9e054c 100644 --- a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc @@ -304,13 +304,13 @@ GET my_index/_search "value": 1, "relation": "eq" }, - "max_score": 0.5753642, + "max_score": 0.26152915, "hits": [ { "_index": "my_index", "_type": "_doc", "_id": "1", - "_score": 0.5753642, + "_score": 0.26152915, "_source": { "title": "Quick Foxes" } diff --git a/docs/reference/how-to/recipes/stemming.asciidoc b/docs/reference/how-to/recipes/stemming.asciidoc index 83f1379cd32a0..87588c799c17e 100644 --- a/docs/reference/how-to/recipes/stemming.asciidoc +++ b/docs/reference/how-to/recipes/stemming.asciidoc @@ -88,13 +88,13 @@ GET index/_search "value": 2, "relation": "eq" }, - "max_score": 0.18232156, + "max_score": 0.082873434, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.18232156, + "_score": 0.082873434, "_source": { "body": "Ski resort" } @@ -103,7 +103,7 @@ GET index/_search "_index": "index", "_type": "_doc", "_id": "2", - "_score": 0.18232156, + "_score": 0.082873434, "_source": { "body": "A pair of skis" } @@ -149,13 +149,13 @@ GET index/_search "value": 1, "relation": "eq" }, - "max_score": 0.8025915, + "max_score": 0.3648143, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.8025915, + "_score": 0.3648143, "_source": { "body": "Ski resort" } @@ -209,13 +209,13 @@ GET index/_search "value": 1, "relation": "eq" }, - "max_score": 0.8025915, + "max_score": 0.3648143, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.8025915, + "_score": 0.3648143, "_source": { "body": "Ski resort" } diff --git a/docs/reference/mapping/params/normalizer.asciidoc b/docs/reference/mapping/params/normalizer.asciidoc index bfd24381753f7..17b90df507e3f 100644 --- a/docs/reference/mapping/params/normalizer.asciidoc +++ b/docs/reference/mapping/params/normalizer.asciidoc @@ -93,13 +93,13 @@ both index and query time. "value": 2, "relation": "eq" }, - "max_score": 0.47000363, + "max_score": 0.21363801, "hits": [ { "_index": "index", "_type": "_doc", "_id": "1", - "_score": 0.47000363, + "_score": 0.21363801, "_source": { "foo": "BÀR" } @@ -108,7 +108,7 @@ both index and query time. "_index": "index", "_type": "_doc", "_id": "2", - "_score": 0.47000363, + "_score": 0.21363801, "_source": { "foo": "bar" } diff --git a/docs/reference/query-dsl/terms-set-query.asciidoc b/docs/reference/query-dsl/terms-set-query.asciidoc index fa879bb068d34..0b5b615c59f6e 100644 --- a/docs/reference/query-dsl/terms-set-query.asciidoc +++ b/docs/reference/query-dsl/terms-set-query.asciidoc @@ -76,13 +76,13 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 0.87546873, + "max_score": 0.39794034, "hits": [ { "_index": "my-index", "_type": "_doc", "_id": "2", - "_score": 0.87546873, + "_score": 0.39794034, "_source": { "codes": ["def", "ghi"], "required_matches": 2 diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index 1d91839b064c6..e70031310fcaa 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -34,18 +34,13 @@ This will yield the following result: "_id":"0", "matched":true, "explanation":{ - "value":1.6943597, + "value":0.7701635, "description":"weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", "details":[ { - "value":1.6943597, + "value":0.7701635, "description":"score(freq=1.0), product of:", "details":[ - { - "value":2.2, - "description":"boost", - "details":[] - }, { "value":1.3862944, "description":"idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:", diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 9970c4cc6223f..35d6149567afa 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -35,13 +35,13 @@ And here is a sample response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 0.6301338, "hits" : [ { "_index" : "twitter", "_type" : "_doc", "_id" : "0", - "_score": 1.3862944, + "_score": 0.6301338, "_source" : { "user" : "kimchy", "message": "trying out Elasticsearch", diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index e798fcf186906..80a5ea0a2542c 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -864,13 +864,13 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 0.72781587, "hits": [ { "_index": "twitter", "_type": "_doc", "_id": "1", - "_score": 1.601195, + "_score": 0.72781587, "_source": { "user": "test", "message": "some message with the number 1", @@ -923,13 +923,13 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 0.72781587, "hits": [ { "_index": "twitter", "_type": "_doc", "_id": "1", - "_score": 1.601195, + "_score": 0.72781587, "_source": { "user": "test", "message": "some message with the number 1", diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index a1eeeb8f06375..80af0b822b188 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -274,13 +274,13 @@ Response not included in text but tested for completeness sake. "value": 1, "relation": "eq" }, - "max_score": 1.0444684, + "max_score": 0.47475836, "hits": [ { "_index": "test", "_type": "_doc", "_id": "1", - "_score": 1.0444684, + "_score": 0.47475836, "_source": ..., "inner_hits": { "comments": { <1> @@ -289,7 +289,7 @@ Response not included in text but tested for completeness sake. "value": 1, "relation": "eq" }, - "max_score": 1.0444684, + "max_score": 0.47475836, "hits": [ { "_index": "test", @@ -299,7 +299,7 @@ Response not included in text but tested for completeness sake. "field": "comments", "offset": 1 }, - "_score": 1.0444684, + "_score": 0.47475836, "fields": { "comments.text.keyword": [ "words words words" @@ -395,13 +395,13 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.3150669, "hits": [ { "_index": "test", "_type": "_doc", "_id": "1", - "_score": 0.6931472, + "_score": 0.3150669, "_source": ..., "inner_hits": { "comments.votes": { <1> @@ -410,7 +410,7 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.3150669, "hits": [ { "_index": "test", @@ -424,7 +424,7 @@ Which would look like: "offset": 0 } }, - "_score": 0.6931472, + "_score": 0.3150669, "_source": { "value": 1, "voter": "kimchy" diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index 87e1da907fb7d..57ac76d5396ba 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -31,13 +31,13 @@ And here is a sample response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 0.6301338, "hits" : [ { "_index" : "twitter", "_type" : "_doc", "_id" : "0", - "_score": 1.3862944, + "_score": 0.6301338, "_source" : { "user" : "kimchy", "date" : "2009-11-15T14:12:12", diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java index c80218c50ebe9..7dec5aa7f5733 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/IndexingIT.java @@ -19,13 +19,13 @@ package org.elasticsearch.upgrades; import org.apache.http.util.EntityUtils; +import org.elasticsearch.Version; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.rest.action.document.RestBulkAction; -import org.elasticsearch.Version; -import org.elasticsearch.client.Request; -import org.elasticsearch.client.Response; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -145,7 +145,7 @@ public void testIndexing() throws IOException { } } - private void bulk(String index, String valueSuffix, int count) throws IOException { + private static void bulk(String index, String valueSuffix, int count) throws IOException { StringBuilder b = new StringBuilder(); for (int i = 0; i < count; i++) { b.append("{\"index\": {\"_index\": \"").append(index).append("\", \"_type\": \"_doc\"}}\n"); diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java new file mode 100644 index 0000000000000..d464059a002c3 --- /dev/null +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SearchIT.java @@ -0,0 +1,185 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.upgrades; + +import org.apache.http.util.EntityUtils; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Test that checks search api behaviour while performing a rolling upgrade + */ +public class SearchIT extends AbstractRollingTestCase { + + private static final String[] VALUES = new String[]{ + //values are in the expected scoring order when searchging for "quick brown fox dog", docs wil be shuffled + //but the ids get assigned to them based on the order in this array + "The quick brown fox jumps over the lazy dog", + "The quick brown dog jumps over the lazy cat", + "The brown fox jumps over the lazy dog", + "The quick fox jumps over the lazy cat", + "The fox jumps over the lazy dog", + }; + + private static final List> DOCS = new ArrayList<>(VALUES.length); + + @BeforeClass + public static void setupDocs() { + for (int i = 0; i < VALUES.length; i++) { + DOCS.add(Tuple.tuple(String.valueOf(i), "{\"text\": \"" + VALUES[i] + "\"}")); + } + Collections.shuffle(DOCS, random()); + } + + /** + * Check that documents ordering stays the same during the upgrade and after when searching against an index made of multiple shards. + * Tests the bw compatibility layer introduced after removing the k1+1 constant factor from the numerator of the bm25 scoring formula. + */ + public void testSingleIndexScoring() throws IOException { + if (CLUSTER_TYPE == ClusterType.OLD) { + index("single-old"); + search("single-old"); + } + if (CLUSTER_TYPE == ClusterType.MIXED) { + waitForGreen("single-old"); + if (indexExists("single-mixed") == false) { + index("single-mixed"); + } + search("single-old"); + search("single-mixed"); + } + if (CLUSTER_TYPE == ClusterType.UPGRADED) { + waitForGreen("single-old,single-mixed"); + index("single-upgraded"); + search("single-old"); + search("single-mixed"); + search("single-upgraded"); + } + } + + /** + * Check that documents ordering stays the same during the upgrade and after when searching against multiple indices, and newer + * indices are explcitly configured to use the LegacyBM25 similarity. + */ + public void testMultipleIndicesScoring() throws IOException { + if (CLUSTER_TYPE == ClusterType.OLD) { + createIndex("multi-old", Settings.builder().put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0).build()); + Tuple document = randomFrom(DOCS); + //index 1 of the 5 documents n an index created before the upgrade + index("multi-old", document); + } + if (CLUSTER_TYPE == ClusterType.MIXED) { + waitForGreen("multi-old"); + if (indexExists("multi-mixed") == false) { + createIndex("multi-mixed", Settings.builder().put("index.number_of_shards", randomBoolean() ? 1 : 2) + .put("index.number_of_replicas", 0).build()); + //index 3 of the 5 documents in an index created during the upgrade + indexNewDocs("multi-mixed", 3); + } + } + if (CLUSTER_TYPE == ClusterType.UPGRADED) { + waitForGreen("multi-old,multi-mixed"); + createIndex("multi-upgraded", Settings.builder().put("index.similarity.default.type", "LegacyBM25").build()); + //index the last document in an index created after the upgrade, which has explicitly set LegacyBM25 + indexNewDocs("multi-upgraded", 1); + //finally test that the order is the same as if the documents were all indexed in the same index + search("multi-old,multi-mixed,multi-upgraded"); + } + } + + private static void indexNewDocs(String index, int numDocs) throws IOException { + for (int i = 0; i < numDocs; i++) { + Tuple document; + do { + document = randomFrom(DOCS); + } while (docExists("multi-old", document.v1()) || docExists("multi-mixed", document.v1())); + index(index, document); + } + } + + private static boolean docExists(String index, String id) throws IOException { + Request existsRequest = new Request("HEAD", "/" + index + "/_doc/" + id); + return client().performRequest(existsRequest).getStatusLine().getStatusCode() == 200; + } + + private static void waitForGreen(String index) throws IOException { + Request waitForGreen = new Request("GET", "/_cluster/health/" + index); + waitForGreen.addParameter("wait_for_nodes", "3"); + waitForGreen.addParameter("wait_for_status", "green"); + waitForGreen.addParameter("timeout", "70s"); + client().performRequest(waitForGreen); + } + + private static void search(String index) throws IOException { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(new MatchQueryBuilder("text", "quick brown fox dog")); + Request request = new Request("GET", "/" + index + "/_search"); + request.addParameter("search_type", "dfs_query_then_fetch"); + request.setJsonEntity(Strings.toString(sourceBuilder)); + SearchResponse searchResponse = search(request); + assertEquals(5, searchResponse.getHits().getTotalHits().value); + SearchHit[] hits = searchResponse.getHits().getHits(); + assertEquals(5, hits.length); + for (int i = 0; i < hits.length; i++) { + assertEquals(String.valueOf(i), hits[i].getId()); + } + } + + private static SearchResponse search(Request request) throws IOException { + Response response = client().performRequest(request); + String responseString = EntityUtils.toString(response.getEntity()); + return SearchResponse.fromXContent(JsonXContent.jsonXContent.createParser(NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, responseString)); + } + + private static void index(String index) throws IOException { + createIndex(index, Settings.builder().put("index.number_of_shards", randomIntBetween(2, 4)) + .put("index.number_of_replicas", 0).build()); + for (Tuple document : DOCS) { + index(index, document); + } + } + + private static void index(String index, Tuple doc) throws IOException { + String id = doc.v1(); + String body = doc.v2(); + Request request = new Request("PUT", "/" + index + "/_doc/" + id); + request.addParameter("refresh", "wait_for"); + request.setJsonEntity(body); + assertEquals(201, client().performRequest(request).getStatusLine().getStatusCode()); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java index f5a870441d43f..b5af32f5c2f17 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java @@ -55,7 +55,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SimilarityProvider that = (SimilarityProvider) o; - /** + /* * We check name only because the similarity is * re-created for each new instance and they don't implement equals. * This is not entirely correct though but we only use equality checks @@ -66,7 +66,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - /** + /* * We use name only because the similarity is * re-created for each new instance and they don't implement equals. * This is not entirely correct though but we only use equality checks diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java index 04970a38bd99d..6596f634808b0 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.similarities.AfterEffect; import org.apache.lucene.search.similarities.AfterEffectB; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModel; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BasicModelIF; @@ -50,6 +51,7 @@ import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH3; import org.apache.lucene.search.similarities.NormalizationZ; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationLogger; @@ -269,13 +271,32 @@ static void assertSettingsIsSubsetOf(String type, Version version, Settings sett } } - public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + public static Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); float k1 = settings.getAsFloat("k1", 1.2f); float b = settings.getAsFloat("b", 0.75f); boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + if (indexCreatedVersion.before(Version.V_7_0_0)) { + //use legacy bm25 for indices created in 6.x + LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } else { + BM25Similarity similarity = new BM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + } + + public static LegacyBM25Similarity createLegacyBM25Similarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LegacyBM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); + + float k1 = settings.getAsFloat("k1", 1.2f); + float b = settings.getAsFloat("b", 0.75f); + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b); similarity.setDiscountOverlaps(discountOverlaps); return similarity; diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 57cbc961aacc0..3457530ffe03b 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -75,7 +75,11 @@ public final class SimilarityService extends AbstractIndexComponent { } }); defaults.put("BM25", version -> { - final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + final Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + return () -> similarity; + }); + defaults.put("LegacyBM25", version -> { + final LegacyBM25Similarity similarity = SimilarityProviders.createLegacyBM25Similarity(Settings.EMPTY, version); return () -> similarity; }); defaults.put("boolean", version -> { @@ -98,6 +102,8 @@ public final class SimilarityService extends AbstractIndexComponent { }); builtIn.put("BM25", (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version)); + builtIn.put("LegacyBM25", + (settings, version, scriptService) -> SimilarityProviders.createLegacyBM25Similarity(settings, version)); builtIn.put("boolean", (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version)); builtIn.put("DFR", @@ -151,8 +157,9 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated())); } this.similarities = providers; - defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get() - : providers.get(SimilarityService.DEFAULT_SIMILARITY).get(); + Supplier defaultSimilarity = providers.get("default"); + this.defaultSimilarity = (defaultSimilarity != null) ? + defaultSimilarity.get() : providers.get(SimilarityService.DEFAULT_SIMILARITY).get(); if (providers.get("base") != null) { deprecationLogger.deprecated("The [base] similarity is ignored since query normalization and coords have been removed"); } @@ -164,7 +171,6 @@ public Similarity similarity(MapperService mapperService) { defaultSimilarity; } - public SimilarityProvider getSimilarity(String name) { Supplier sim = similarities.get(name); if (sim == null) { @@ -273,5 +279,4 @@ private static void fail(Version indexCreatedVersion, String message) { deprecationLogger.deprecated(message); } } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java index 13398d8791437..9915f7116a662 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/LegacySimilarityTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.VersionUtils; import java.io.IOException; @@ -43,9 +44,8 @@ protected boolean forbidPrivateIndexSettings() { } public void testResolveDefaultSimilaritiesOn6xIndex() { - final Settings indexSettings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden - .build(); + Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)); + final Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); final SimilarityService similarityService = createIndex("foo", indexSettings).similarityService(); assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class)); assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally " @@ -89,5 +89,4 @@ public void testResolveSimilaritiesFromMappingClassic() throws IOException { assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index 98c2abb3f8219..d6fd74d8cd6b0 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -21,14 +21,17 @@ import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; import org.hamcrest.Matchers; import java.util.Collections; @@ -40,6 +43,14 @@ public void testDefaultSimilarity() { Settings settings = Settings.builder().build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class)); + } + + public void testDefaultSimilarity6xIndices() { + Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, VersionUtils.getPreviousVersion(Version.V_7_0_0)); + Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); } @@ -60,7 +71,15 @@ public void testOverrideDefaultSimilarity() { .build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); - assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity); + assertThat(service.getDefaultSimilarity(), instanceOf(BooleanSimilarity.class)); + } + + public void testOverrideDefaultSimilarityWithLegacyBM25() { + Settings settings = Settings.builder().put("index.similarity.default.type", "LegacyBM25") + .build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); + SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); + assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class)); } public void testSimilarityValidation() { @@ -133,5 +152,4 @@ public float score(float freq, long norm) { () -> SimilarityService.validateSimilarity(Version.V_7_0_0, increasingScoresWithNormSim)); assertThat(e.getMessage(), Matchers.containsString("Similarity scores should not increase when norm increases")); } - } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java index fd5a77665fecd..6591e3a09da95 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.DFISimilarity; @@ -50,6 +51,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.nullValue; public class SimilarityTests extends ESSingleNodeTestCase { @@ -60,16 +62,17 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); - assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class)); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); + assertThat(similarityService.getSimilarity("LegacyBM25").get(), instanceOf(LegacyBM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); - assertThat(similarityService.getSimilarity("default"), equalTo(null)); + assertThat(similarityService.getSimilarity("default"), nullValue()); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic")); assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + "similarity instead.", e.getMessage()); } - public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException { + public void testResolveSimilaritiesFromMapping_classicIsForbidden() { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "classic") .put("index.similarity.my_similarity.discount_overlaps", false) @@ -94,13 +97,34 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { .put("index.similarity.my_similarity.discount_overlaps", false) .build(); MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService(); + assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(BM25Similarity.class)); + + BM25Similarity similarity = (BM25Similarity) mapperService.fullName("field1").similarity().get(); + assertThat(similarity.getK1(), equalTo(2.0f)); + assertThat(similarity.getB(), equalTo(0.5f)); + assertThat(similarity.getDiscountOverlaps(), equalTo(false)); + } + + public void testResolveSimilaritiesFromMapping_LegacyBM25() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field1").field("type", "text").field("similarity", "my_similarity").endObject() + .endObject() + .endObject().endObject(); + + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "LegacyBM25") + .put("index.similarity.my_similarity.k1", 2.0f) + .put("index.similarity.my_similarity.b", 0.5f) + .put("index.similarity.my_similarity.discount_overlaps", false) + .build(); + MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService(); assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(LegacyBM25Similarity.class)); LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fullName("field1").similarity().get(); assertThat(similarity.getK1(), equalTo(2.0f)); assertThat(similarity.getB(), equalTo(0.5f)); - // TODO: re-enable when we switch back to BM25Similarity - // assertThat(similarity.getDiscountOverlaps(), equalTo(false)); + assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } public void testResolveSimilaritiesFromMapping_boolean() throws IOException { @@ -234,7 +258,7 @@ public void testResolveSimilaritiesFromMapping_Unknown() throws IOException { } } - public void testUnknownParameters() throws IOException { + public void testUnknownParameters() { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "BM25") .put("index.similarity.my_similarity.z", 2.0f) diff --git a/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java b/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java index 5d3b19697d788..704248c29ada8 100644 --- a/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/basic/TransportTwoNodesSearchIT.java @@ -148,15 +148,15 @@ public void testDfsQueryThenFetch() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); assertThat("id[" + hit.getId() + "] -> " + hit.getExplanation().toString(), hit.getId(), equalTo(Integer.toString(100 - total - i - 1))); @@ -187,15 +187,15 @@ public void testDfsQueryThenFetchWithSort() throws Exception { SearchHit hit = hits[i]; assertThat(hit.getExplanation(), notNullValue()); assertThat(hit.getExplanation().getDetails().length, equalTo(1)); - assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(3)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails().length, equalTo(2)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails().length, equalTo(2)); + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getDescription(), startsWith("n,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[0].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[0].getValue(), equalTo(100L)); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getDescription(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getDescription(), startsWith("N,")); - assertThat(hit.getExplanation().getDetails()[0].getDetails()[1].getDetails()[1].getValue(), + assertThat(hit.getExplanation().getDetails()[0].getDetails()[0].getDetails()[1].getValue(), equalTo(100L)); assertThat("id[" + hit.getId() + "]", hit.getId(), equalTo(Integer.toString(total + i))); } diff --git a/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java b/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java index 5feb341fd6943..240f9d4672f7f 100644 --- a/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java +++ b/server/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java @@ -326,7 +326,7 @@ public void testExplain() throws Exception { assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation(); assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore())); - assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1")); + assertThat(explanation.toString(), startsWith(explanation.getValue() + " = Score based on 2 child docs in range from 0 to 1")); } public void testSimpleNestedSorting() throws Exception { diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec index 2903292b1adff..d07f9c664dd51 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec @@ -898,10 +898,10 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY SCORE() DESC; SCORE() | author | name | page_count | release_date ---------------+---------------+-------------------+---------------+-------------------- -2.2886353 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z -1.8893257 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z -1.6086556 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z -1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z +1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z +0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z +0.731207 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z +0.6366317 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z // end::orderByScore ; @@ -912,10 +912,10 @@ SELECT SCORE(), * FROM library WHERE MATCH(name, 'dune') ORDER BY page_count DES SCORE() | author | name | page_count | release_date ---------------+---------------+-------------------+---------------+-------------------- -2.2886353 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z -1.4005898 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z -1.6086556 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z -1.8893257 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z +1.0402887 |Frank Herbert |Dune |604 |1965-06-01T00:00:00Z +0.6366317 |Frank Herbert |God Emperor of Dune|454 |1981-05-28T00:00:00Z +0.731207 |Frank Herbert |Children of Dune |408 |1976-04-21T00:00:00Z +0.8587844 |Frank Herbert |Dune Messiah |331 |1969-10-15T00:00:00Z // end::orderByScoreWithMatch ; @@ -926,10 +926,10 @@ SELECT SCORE() AS score, name, release_date FROM library WHERE QUERY('dune') ORD score | name | release_date ---------------+-------------------+-------------------- -1.4005898 |God Emperor of Dune|1981-05-28T00:00:00Z -1.6086556 |Children of Dune |1976-04-21T00:00:00Z -1.8893257 |Dune Messiah |1969-10-15T00:00:00Z -2.2886353 |Dune |1965-06-01T00:00:00Z +0.6366317 |God Emperor of Dune|1981-05-28T00:00:00Z +0.731207 |Children of Dune |1976-04-21T00:00:00Z +0.8587844 |Dune Messiah |1969-10-15T00:00:00Z +1.0402887 |Dune |1965-06-01T00:00:00Z // end::scoreWithMatch ; diff --git a/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec index 07df14d99e36b..cfe8536b6b2e6 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/fulltext.csv-spec @@ -76,12 +76,12 @@ score SELECT emp_no, first_name, SCORE() FROM test_emp WHERE MATCH(first_name, 'Erez') ORDER BY SCORE(); emp_no:i | first_name:s | SCORE():f -10076 |Erez |4.1053944 +10076 |Erez |1.8660883 ; scoreAsSomething SELECT emp_no, first_name, SCORE() as s FROM test_emp WHERE MATCH(first_name, 'Erez') ORDER BY SCORE(); emp_no:i | first_name:s | s:f -10076 |Erez |4.1053944 +10076 |Erez |1.8660883 ;