Skip to content

Commit

Permalink
handle bwc for the standard filter
Browse files Browse the repository at this point in the history
  • Loading branch information
jimczi committed Sep 5, 2018
1 parent 36ae5e2 commit 67efaae
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@

package org.elasticsearch.indices.analysis;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
Expand Down Expand Up @@ -67,6 +71,8 @@ public final class AnalysisModule {

private static final IndexSettings NA_INDEX_SETTINGS;

private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(LogManager.getLogger(AnalysisModule.class));

private final HunspellService hunspellService;
private final AnalysisRegistry analysisRegistry;

Expand Down Expand Up @@ -114,6 +120,29 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
hunspellService) {
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
// Add "standard" for old indices (bwc)
tokenFilters.register("standard", new AnalysisProvider<TokenFilterFactory>() {
@Override
public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
if (indexSettings.getIndexVersionCreated().before(Version.V_7_0_0_alpha1)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
"The [standard] token filter name is deprecated and will be removed in a future version.");

This comment has been minimized.

Copy link
@jpountz

jpountz Sep 6, 2018

Contributor

we need to remember to merge this deprecation warning to 6.x too

} else {
throw new IllegalArgumentException("The [standard] token filter has been removed.");
}
return new AbstractTokenFilterFactory(indexSettings, name, settings) {
@Override
public TokenStream create(TokenStream tokenStream) {
return tokenStream;
}
};
}

@Override
public boolean requiresAnalysisSettings() {
return false;
}
});
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("hunspell", requiresAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
(indexSettings, name, settings, hunspellService)));
Expand Down Expand Up @@ -150,6 +179,17 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List

// Add filters available in lucene-core
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
// Add "standard" for old indices (bwc)
preConfiguredTokenFilters.register( "standard",
PreConfiguredTokenFilter.singletonWithVersion("standard", false, (reader, version) -> {
if (version.before(Version.V_7_0_0_alpha1)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
"The [standard] token filter is deprecated and will be removed in a future version.");
} else {
throw new IllegalArgumentException("The [standard] token filter has been removed.");
}
return reader;
}));
/* Note that "stop" is available in lucene-core but it's pre-built
* version uses a set of English stop words that are in
* lucene-analyzers-common so "stop" is defined in the analysis-common
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
Expand Down Expand Up @@ -240,6 +241,35 @@ public void testUnderscoreInAnalyzerName() throws IOException {
}
}

public void testStandardFilterBWC() throws IOException {
Version version = VersionUtils.randomVersionBetween(random(), Version.V_6_0_0, Version.CURRENT.minimumCompatibilityVersion());
// bwc deprecation
{
Settings settings = Settings.builder()
.put("index.analysis.analyzer.my_standard.tokenizer", "standard")
.put("index.analysis.analyzer.my_standard.filter", "standard")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
.build();
IndexAnalyzers analyzers = getIndexAnalyzers(settings);
assertTokenStreamContents(analyzers.get("my_standard").tokenStream("", "test"), new String[]{"test"});
assertWarnings("The [standard] token filter is deprecated and will be removed in a future version.");
}
// removal
{
final Settings settings = Settings.builder()
.put("index.analysis.analyzer.my_standard.tokenizer", "standard")
.put("index.analysis.analyzer.my_standard.filter", "standard")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_7_0_0_alpha1)
.build();
IndexAnalyzers analyzers = getIndexAnalyzers(settings);
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () ->
analyzers.get("my_standard").tokenStream("", ""));
assertThat(exc.getMessage(), equalTo("The [standard] token filter has been removed."));
}
}

/**
* Tests that plugins can register pre-configured char filters that vary in behavior based on Elasticsearch version, Lucene version,
* and that do not vary based on version at all.
Expand Down Expand Up @@ -376,42 +406,41 @@ public void reset() throws IOException {
}
}
AnalysisRegistry registry = new AnalysisModule(TestEnvironment.newEnvironment(emptyNodeSettings),
singletonList(new AnalysisPlugin() {
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
return Arrays.asList(
singletonList(new AnalysisPlugin() {
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
return Arrays.asList(
PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version"),
noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null),
noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null),
PreConfiguredTokenizer.luceneVersion("lucene_version",
luceneVersion -> new FixedTokenizer(luceneVersion.toString()),
luceneVersionSupportsMultiTerm ?
luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null),
luceneVersion -> new FixedTokenizer(luceneVersion.toString()),
luceneVersionSupportsMultiTerm ?
luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null),
PreConfiguredTokenizer.elasticsearchVersion("elasticsearch_version",
esVersion -> new FixedTokenizer(esVersion.toString()),
elasticsearchVersionSupportsMultiTerm ?
esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null)
);
}
})).getAnalysisRegistry();
esVersion -> new FixedTokenizer(esVersion.toString()),
elasticsearchVersionSupportsMultiTerm ?
esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null)
);
}
})).getAnalysisRegistry();

Version version = VersionUtils.randomVersion(random());
IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder()
.put("index.analysis.analyzer.no_version.tokenizer", "no_version")
.put("index.analysis.analyzer.lucene_version.tokenizer", "lucene_version")
.put("index.analysis.analyzer.elasticsearch_version.tokenizer", "elasticsearch_version")
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
.build());
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] {"no_version"});
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] {version.luceneVersion.toString()});
assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[] {version.toString()});

// These are current broken by https://github.com/elastic/elasticsearch/issues/24752
// assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""),
// analyzers.get("no_version").normalize("", "test").utf8ToString());
// assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""),
// analyzers.get("lucene_version").normalize("", "test").utf8ToString());
// assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""),
// analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString());
.put("index.analysis.analyzer.no_version.tokenizer", "no_version")
.put("index.analysis.analyzer.lucene_version.tokenizer", "lucene_version")
.put("index.analysis.analyzer.elasticsearch_version.tokenizer", "elasticsearch_version")
.put(IndexMetaData.SETTING_VERSION_CREATED, version)
.build());
assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[]{"no_version"});
assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[]{version.luceneVersion.toString()});
assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[]{version.toString()});

assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""),
analyzers.get("no_version").normalize("", "test").utf8ToString());
assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""),
analyzers.get("lucene_version").normalize("", "test").utf8ToString());
assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""),
analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString());
}

public void testRegisterHunspellDictionary() throws Exception {
Expand Down

0 comments on commit 67efaae

Please sign in to comment.