Merge branch 'main' into string-limit

Signed-off-by: Rishab Nahata <[email protected]>
opensearch-project · Jun 29, 2023 · be82717 · be82717
2 parents 0605f3f + b33979a
commit be82717
Show file tree

Hide file tree

Showing 45 changed files with 1,059 additions and 149 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -80,6 +80,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased 2.x]
 ### Added
+- [SearchPipeline] Add new search pipeline processor type, SearchPhaseResultsProcessor, that can modify the result of one search phase before starting the next phase.([#7283](https://github.com/opensearch-project/OpenSearch/pull/7283))
 - Add task cancellation monitoring service ([#7642](https://github.com/opensearch-project/OpenSearch/pull/7642))
 - Add TokenManager Interface ([#7452](https://github.com/opensearch-project/OpenSearch/pull/7452))
 - Add Remote store as a segment replication source ([#7653](https://github.com/opensearch-project/OpenSearch/pull/7653))
@@ -139,6 +140,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496))
 - Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
 - Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))
+- Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908))
+
 
 ### Deprecated
 
@@ -151,6 +154,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039))
 - With only GlobalAggregation in request causes unnecessary wrapping with MultiCollector ([#8125](https://github.com/opensearch-project/OpenSearch/pull/8125))
 - Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591))
+- Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318))
 
 ### Security
 

diff --git a/.../reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java b/.../reindex/src/internalClusterTest/java/org/opensearch/index/codec/MultiCodecReindexIT.java
@@ -0,0 +1,189 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.codec;
+
+import org.opensearch.action.admin.indices.flush.FlushResponse;
+import org.opensearch.action.admin.indices.refresh.RefreshResponse;
+import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest;
+import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
+import org.opensearch.action.support.ActiveShardCount;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.index.engine.Segment;
+import org.opensearch.index.reindex.BulkByScrollResponse;
+import org.opensearch.index.reindex.ReindexAction;
+import org.opensearch.index.reindex.ReindexRequestBuilder;
+import org.opensearch.index.reindex.ReindexTestCase;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.ExecutionException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static java.util.stream.Collectors.toList;
+import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA;
+import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ;
+import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE;
+import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY;
+import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY_ALLOW_DELETE;
+import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
+import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures;
+
+public class MultiCodecReindexIT extends ReindexTestCase {
+
+    public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException {
+        internalCluster().ensureAtLeastNumDataNodes(1);
+        Map<String, String> codecMap = Map.of(
+            "best_compression",
+            "BEST_COMPRESSION",
+            "zstd_no_dict",
+            "ZSTD_NO_DICT",
+            "zstd",
+            "ZSTD",
+            "default",
+            "BEST_SPEED"
+        );
+
+        for (Map.Entry<String, String> codec : codecMap.entrySet()) {
+            assertReindexingWithMultipleCodecs(codec.getKey(), codec.getValue(), codecMap);
+        }
+
+    }
+
+    private void assertReindexingWithMultipleCodecs(String destCodec, String destCodecMode, Map<String, String> codecMap)
+        throws ExecutionException, InterruptedException {
+
+        final String index = "test-index" + destCodec;
+        final String destIndex = "dest-index" + destCodec;
+
+        // creating source index
+        createIndex(
+            index,
+            Settings.builder()
+                .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+                .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+                .put("index.codec", "default")
+                .put("index.merge.policy.max_merged_segment", "1b")
+                .build()
+        );
+        ensureGreen(index);
+
+        final int nbDocs = randomIntBetween(2, 5);
+
+        // indexing with all 4 codecs
+        for (Map.Entry<String, String> codec : codecMap.entrySet()) {
+            useCodec(index, codec.getKey());
+            ingestDocs(index, nbDocs);
+        }
+
+        assertTrue(
+            getSegments(index).stream()
+                .flatMap(s -> s.getAttributes().values().stream())
+                .collect(Collectors.toSet())
+                .containsAll(codecMap.values())
+        );
+
+        // creating destination index with destination codec
+        createIndex(
+            destIndex,
+            Settings.builder()
+                .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+                .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+                .put("index.codec", destCodec)
+                .build()
+        );
+
+        BulkByScrollResponse bulkResponse = new ReindexRequestBuilder(client(), ReindexAction.INSTANCE).source(index)
+            .destination(destIndex)
+            .refresh(true)
+            .waitForActiveShards(ActiveShardCount.ONE)
+            .get();
+
+        assertEquals(codecMap.size() * nbDocs, bulkResponse.getCreated());
+        assertEquals(codecMap.size() * nbDocs, bulkResponse.getTotal());
+        assertEquals(0, bulkResponse.getDeleted());
+        assertEquals(0, bulkResponse.getNoops());
+        assertEquals(0, bulkResponse.getVersionConflicts());
+        assertEquals(1, bulkResponse.getBatches());
+        assertTrue(bulkResponse.getTook().getMillis() > 0);
+        assertEquals(0, bulkResponse.getBulkFailures().size());
+        assertEquals(0, bulkResponse.getSearchFailures().size());
+        assertTrue(getSegments(destIndex).stream().allMatch(segment -> segment.attributes.containsValue(destCodecMode)));
+    }
+
+    private void useCodec(String index, String codec) throws ExecutionException, InterruptedException {
+        assertAcked(client().admin().indices().prepareClose(index));
+
+        assertAcked(
+            client().admin()
+                .indices()
+                .updateSettings(new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", codec)))
+                .get()
+        );
+
+        assertAcked(client().admin().indices().prepareOpen(index));
+    }
+
+    private void flushAndRefreshIndex(String index) {
+
+        // Request is not blocked
+        for (String blockSetting : Arrays.asList(
+            SETTING_BLOCKS_READ,
+            SETTING_BLOCKS_WRITE,
+            SETTING_READ_ONLY,
+            SETTING_BLOCKS_METADATA,
+            SETTING_READ_ONLY_ALLOW_DELETE
+        )) {
+            try {
+                enableIndexBlock(index, blockSetting);
+                // flush
+                FlushResponse flushResponse = client().admin().indices().prepareFlush(index).setForce(true).execute().actionGet();
+                assertNoFailures(flushResponse);
+
+                // refresh
+                RefreshResponse refreshResponse = client().admin().indices().prepareRefresh(index).execute().actionGet();
+                assertNoFailures(refreshResponse);
+            } finally {
+                disableIndexBlock(index, blockSetting);
+            }
+        }
+    }
+
+    private void ingestDocs(String index, int nbDocs) throws InterruptedException {
+
+        indexRandom(
+            randomBoolean(),
+            false,
+            randomBoolean(),
+            IntStream.range(0, nbDocs)
+                .mapToObj(i -> client().prepareIndex(index).setId(UUID.randomUUID().toString()).setSource("num", i))
+                .collect(toList())
+        );
+        flushAndRefreshIndex(index);
+    }
+
+    private ArrayList<Segment> getSegments(String index) {
+
+        return new ArrayList<>(
+            client().admin()
+                .indices()
+                .segments(new IndicesSegmentsRequest(index))
+                .actionGet()
+                .getIndices()
+                .get(index)
+                .getShards()
+                .get(0)
+                .getShards()[0].getSegments()
+        );
+    }
+
+}
diff --git a/...mon/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/50_script_processor.yml b/...mon/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/50_script_processor.yml
@@ -39,7 +39,7 @@ teardown:
               {
                 "script" : {
                   "lang" : "painless",
-                  "source" : "ctx._source['size'] += 10; ctx._source['from'] -= 1; ctx._source['explain'] = !ctx._source['explain']; ctx._source['version'] = !ctx._source['version']; ctx._source['seq_no_primary_term'] = !ctx._source['seq_no_primary_term']; ctx._source['track_scores'] = !ctx._source['track_scores']; ctx._source['track_total_hits'] = 1; ctx._source['min_score'] -= 0.9; ctx._source['terminate_after'] += 2; ctx._source['profile'] = !ctx._source['profile'];"
+                  "source" : "ctx._source['size'] += 10; ctx._source['from'] = ctx._source['from'] <= 0 ? ctx._source['from'] : ctx._source['from'] - 1 ; ctx._source['explain'] = !ctx._source['explain']; ctx._source['version'] = !ctx._source['version']; ctx._source['seq_no_primary_term'] = !ctx._source['seq_no_primary_term']; ctx._source['track_scores'] = !ctx._source['track_scores']; ctx._source['track_total_hits'] = 1; ctx._source['min_score'] -= 0.9; ctx._source['terminate_after'] += 2; ctx._source['profile'] = !ctx._source['profile'];"
                 }
               }
             ]

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml
@@ -712,3 +712,51 @@ setup:
   - match: { aggregations.m_terms.buckets.0.key: ["a", 1] }
   - match: { aggregations.m_terms.buckets.0.key_as_string: "a|1" }
   - match: { aggregations.m_terms.buckets.0.doc_count: 4 }
+
+---
+"aggregate over multi-terms test":
+  - skip:
+      version: "- 2.9.99"
+      reason: "multi_terms aggregation was introduced in 2.1.0, NPE bug checked by this test case will manifest in any version < 3.0"
+
+  - do:
+      bulk:
+        index: test_1
+        refresh: true
+        body:
+          - '{"index": {}}'
+          - '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-23"}'
+          - '{"index": {}}'
+          - '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-25"}'
+          - '{"index": {}}'
+          - '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-23"}'
+          - '{"index": {}}'
+          - '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-25"}'
+
+  - do:
+      search:
+        index: test_1
+        size: 0
+        body:
+          aggs:
+            histo:
+              date_histogram:
+                field: date
+                calendar_interval: day
+              aggs:
+                m_terms:
+                  multi_terms:
+                    terms:
+                      - field: str
+                      - field: ip
+
+  - match: { hits.total.value: 4 }
+  - length: { aggregations.histo.buckets: 3 }
+  - match: { aggregations.histo.buckets.0.key_as_string: "2022-03-23T00:00:00.000Z" }
+  - match: { aggregations.histo.buckets.0.m_terms.buckets.0.key: ["a", "127.0.0.1"] }
+  - match: { aggregations.histo.buckets.0.m_terms.buckets.1.key: ["b", "127.0.0.1"] }
+  - match: { aggregations.histo.buckets.1.key_as_string: "2022-03-24T00:00:00.000Z" }
+  - length: { aggregations.histo.buckets.1.m_terms.buckets: 0 }
+  - match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" }
+  - match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] }
+  - match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] }
diff --git a/sandbox/plugins/custom-codecs/build.gradle b/sandbox/plugins/custom-codecs/build.gradle
diff --git a/...ustom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java b/...ustom-codecs/src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java
diff --git a/sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy b/sandbox/plugins/custom-codecs/src/main/plugin-metadata/plugin-security.policy