From 7bb2da197dcabba8cbd0244cb5557b8ebb88e462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 15 Feb 2019 20:15:05 +0100 Subject: [PATCH] Remove `nGram` and `edgeNGram` token filter names (#38911) In #30209 we deprecated the camel case `nGram` filter name in favour of `ngram` and did the same for `edgeNGram` and `edge_ngram`. Using these names has been deprecated since 6.4 and is issuing deprecation warnings since then. I think we can remove these filters in 8.0. In a backport of this PR I would change what was a dreprecation warning from 6.4. to an error starting with new indices created in 7.0. --- .../edgengram-tokenfilter.asciidoc | 4 +- .../tokenfilters/ngram-tokenfilter.asciidoc | 4 +- docs/reference/migration/migrate_8_0.asciidoc | 6 +- .../migration/migrate_8_0/mappings.asciidoc | 10 +++ .../analysis/common/CommonAnalysisPlugin.java | 16 ---- .../common/CommonAnalysisFactoryTests.java | 2 - .../common/CommonAnalysisPluginTests.java | 86 ------------------- .../common/HighlighterWithAnalyzersTests.java | 2 +- .../test/analysis-common/30_tokenizers.yml | 60 +++++-------- .../test/indices.analyze/10_analyze.yml | 2 +- 10 files changed, 43 insertions(+), 149 deletions(-) create mode 100644 docs/reference/migration/migrate_8_0/mappings.asciidoc diff --git a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc index be37d24f7dd7c..e460725523cf6 100644 --- a/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/edgengram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-edgengram-tokenfilter]] === Edge NGram Token Filter -A token filter of type `edgeNGram`. +A token filter of type `edge_ngram`. -The following are settings that can be set for a `edgeNGram` token +The following are settings that can be set for a `edge_ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc index acc178a2741fa..53bda23d12bf9 100644 --- a/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/ngram-tokenfilter.asciidoc @@ -1,9 +1,9 @@ [[analysis-ngram-tokenfilter]] === NGram Token Filter -A token filter of type `nGram`. +A token filter of type `ngram`. -The following are settings that can be set for a `nGram` token filter +The following are settings that can be set for a `ngram` token filter type: [cols="<,<",options="header",] diff --git a/docs/reference/migration/migrate_8_0.asciidoc b/docs/reference/migration/migrate_8_0.asciidoc index 4477090dc16b6..ea1166e36d3b9 100644 --- a/docs/reference/migration/migrate_8_0.asciidoc +++ b/docs/reference/migration/migrate_8_0.asciidoc @@ -9,4 +9,8 @@ your application to {es} 8.0. See also <> and <>. -coming[8.0.0] \ No newline at end of file +coming[8.0.0] + +* <> + +include::migrate_8_0/mappings.asciidoc[] \ No newline at end of file diff --git a/docs/reference/migration/migrate_8_0/mappings.asciidoc b/docs/reference/migration/migrate_8_0/mappings.asciidoc new file mode 100644 index 0000000000000..ea36309447e4c --- /dev/null +++ b/docs/reference/migration/migrate_8_0/mappings.asciidoc @@ -0,0 +1,10 @@ +[float] +[[breaking_80_mappings_changes]] +=== Mapping changes + +[float] +==== The `nGram` and `edgeNGram` token filter names have been removed + +The `nGram` and `edgeNGram` token filter names that have been deprecated since +version 6.4 have been removed. Both token filters should be used by their +alternative names `ngram` and `edge_ngram` instead. \ No newline at end of file diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index ac439f4bae227..e05305a8ebd39 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -414,14 +414,6 @@ public List getPreConfiguredTokenFilters() { filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer()))); filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input -> new EdgeNGramTokenFilter(input, 1))); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { - deprecationLogger.deprecatedAndMaybeLog("edgeNGram_deprecation", - "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " - + "Please change the filter name to [edge_ngram] instead."); - } - return new EdgeNGramTokenFilter(reader, 1); - })); filters.add(PreConfiguredTokenFilter.singleton("elision", true, input -> new ElisionFilter(input, FrenchAnalyzer.DEFAULT_ARTICLES))); filters.add(PreConfiguredTokenFilter.singleton("french_stem", false, input -> new SnowballFilter(input, new FrenchStemmer()))); @@ -438,14 +430,6 @@ public List getPreConfiguredTokenFilters() { LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false))); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> { - if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { - deprecationLogger.deprecatedAndMaybeLog("nGram_deprecation", - "The [nGram] token filter name is deprecated and will be removed in a future version. " - + "Please change the filter name to [ngram] instead."); - } - return new NGramTokenFilter(reader, 1, 2, false); - })); filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new)); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index 99e882c622085..acb7f2213f641 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -185,7 +185,6 @@ protected Map> getPreConfiguredTokenFilters() { filters.put("delimited_payload", org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory.class); filters.put("dutch_stem", SnowballPorterFilterFactory.class); filters.put("edge_ngram", null); - filters.put("edgeNGram", null); filters.put("elision", null); filters.put("french_stem", SnowballPorterFilterFactory.class); filters.put("german_stem", null); @@ -197,7 +196,6 @@ protected Map> getPreConfiguredTokenFilters() { filters.put("length", null); filters.put("limit", LimitTokenCountFilterFactory.class); filters.put("ngram", null); - filters.put("nGram", null); filters.put("persian_normalization", null); filters.put("porter_stem", null); filters.put("reverse", ReverseStringFilterFactory.class); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java index c52c78ffe27e3..f128c07361c45 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisPluginTests.java @@ -20,8 +20,6 @@ package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; @@ -29,98 +27,14 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; -import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; import org.elasticsearch.test.VersionUtils; import java.io.IOException; -import java.io.StringReader; -import java.util.Map; public class CommonAnalysisPluginTests extends ESTestCase { - /** - * Check that the deprecated name "nGram" issues a deprecation warning for indices created since 6.3.0 - */ - public void testNGramDeprecationWarning() throws IOException { - Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) - .build(); - - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); - assertWarnings( - "The [nGram] token filter name is deprecated and will be removed in a future version. " - + "Please change the filter name to [ngram] instead."); - } - } - - /** - * Check that the deprecated name "nGram" does NOT issues a deprecation warning for indices created before 6.4.0 - */ - public void testNGramNoDeprecationWarningPre6_4() throws IOException { - Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) - .build(); - - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); - } - } - - /** - * Check that the deprecated name "edgeNGram" issues a deprecation warning for indices created since 6.3.0 - */ - public void testEdgeNGramDeprecationWarning() throws IOException { - Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_4_0, Version.CURRENT)) - .build(); - - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); - assertWarnings( - "The [edgeNGram] token filter name is deprecated and will be removed in a future version. " - + "Please change the filter name to [edge_ngram] instead."); - } - } - - /** - * Check that the deprecated name "edgeNGram" does NOT issues a deprecation warning for indices created before 6.4.0 - */ - public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException { - Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) - .put(IndexMetaData.SETTING_VERSION_CREATED, - VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.V_6_3_0)) - .build(); - - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) { - Map tokenFilters = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).tokenFilter; - TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram"); - Tokenizer tokenizer = new MockTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - assertNotNull(tokenFilterFactory.create(tokenizer)); - } - } - - /** * Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0 */ diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index e96243efc4254..8f58a074cf102 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -81,7 +81,7 @@ public void testNgramHighlightingWithBrokenPositions() throws IOException { .put("analysis.tokenizer.autocomplete.max_gram", 20) .put("analysis.tokenizer.autocomplete.min_gram", 1) .put("analysis.tokenizer.autocomplete.token_chars", "letter,digit") - .put("analysis.tokenizer.autocomplete.type", "nGram") + .put("analysis.tokenizer.autocomplete.type", "ngram") .put("analysis.filter.wordDelimiter.type", "word_delimiter") .putList("analysis.filter.wordDelimiter.type_table", "& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM", diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 9a7c158fc4734..4fe5162e68743 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -23,38 +23,6 @@ - match: { detail.tokenizer.tokens.0.token: Foo Bar! } --- -"nGram": - - do: - indices.analyze: - body: - text: good - explain: true - tokenizer: - type: nGram - min_gram: 2 - max_gram: 2 - - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: _anonymous_tokenizer } - - match: { detail.tokenizer.tokens.0.token: go } - - match: { detail.tokenizer.tokens.1.token: oo } - - match: { detail.tokenizer.tokens.2.token: od } - ---- -"nGram_exception": - - skip: - version: " - 6.99.99" - reason: only starting from version 7.x this throws an error - - do: - catch: /The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to[:] \[1\] but was \[2\]\. This limit can be set by changing the \[index.max_ngram_diff\] index level setting\./ - indices.analyze: - body: - text: good - explain: true - tokenizer: - type: nGram - min_gram: 2 - max_gram: 4 ---- "simple_pattern": - do: indices.analyze: @@ -133,7 +101,7 @@ text: "foobar" explain: true tokenizer: - type: nGram + type: ngram min_gram: 3 max_gram: 3 - length: { detail.tokenizer.tokens: 4 } @@ -162,15 +130,31 @@ body: text: "foo" explain: true - tokenizer: nGram + tokenizer: ngram - length: { detail.tokenizer.tokens: 5 } - - match: { detail.tokenizer.name: nGram } + - match: { detail.tokenizer.name: ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } - match: { detail.tokenizer.tokens.2.token: o } - match: { detail.tokenizer.tokens.3.token: oo } - match: { detail.tokenizer.tokens.4.token: o } +--- +"ngram_exception": + - skip: + version: " - 6.99.99" + reason: only starting from version 7.x this throws an error + - do: + catch: /The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to[:] \[1\] but was \[2\]\. This limit can be set by changing the \[index.max_ngram_diff\] index level setting\./ + indices.analyze: + body: + text: good + explain: true + tokenizer: + type: ngram + min_gram: 2 + max_gram: 4 + --- "edge_ngram": - do: @@ -194,7 +178,7 @@ text: "foo" explain: true tokenizer: - type: edgeNGram + type: edge_ngram min_gram: 1 max_gram: 3 - length: { detail.tokenizer.tokens: 3 } @@ -219,9 +203,9 @@ body: text: "foo" explain: true - tokenizer: edgeNGram + tokenizer: edge_ngram - length: { detail.tokenizer.tokens: 2 } - - match: { detail.tokenizer.name: edgeNGram } + - match: { detail.tokenizer.name: edge_ngram } - match: { detail.tokenizer.tokens.0.token: f } - match: { detail.tokenizer.tokens.1.token: fo } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index ec00b6d41f1c5..56bbed7044e14 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -76,7 +76,7 @@ analysis: tokenizer: trigram: - type: nGram + type: ngram min_gram: 3 max_gram: 3 filter: