Skip to content

Commit

Permalink
Merge branch 'main' into string-limit
Browse files Browse the repository at this point in the history
Signed-off-by: Rishab Nahata <[email protected]>
  • Loading branch information
imRishN authored Jun 29, 2023
2 parents 0605f3f + b33979a commit be82717
Show file tree
Hide file tree
Showing 45 changed files with 1,059 additions and 149 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x]
### Added
- [SearchPipeline] Add new search pipeline processor type, SearchPhaseResultsProcessor, that can modify the result of one search phase before starting the next phase.([#7283](https://github.com/opensearch-project/OpenSearch/pull/7283))
- Add task cancellation monitoring service ([#7642](https://github.com/opensearch-project/OpenSearch/pull/7642))
- Add TokenManager Interface ([#7452](https://github.com/opensearch-project/OpenSearch/pull/7452))
- Add Remote store as a segment replication source ([#7653](https://github.com/opensearch-project/OpenSearch/pull/7653))
Expand Down Expand Up @@ -139,6 +140,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496))
- Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))
- Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908))


### Deprecated

Expand All @@ -151,6 +154,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039))
- With only GlobalAggregation in request causes unnecessary wrapping with MultiCollector ([#8125](https://github.com/opensearch-project/OpenSearch/pull/8125))
- Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591))
- Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318))

### Security

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.opensearch.action.admin.indices.flush.FlushResponse;
import org.opensearch.action.admin.indices.refresh.RefreshResponse;
import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.action.support.ActiveShardCount;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.engine.Segment;
import org.opensearch.index.reindex.BulkByScrollResponse;
import org.opensearch.index.reindex.ReindexAction;
import org.opensearch.index.reindex.ReindexRequestBuilder;
import org.opensearch.index.reindex.ReindexTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY_ALLOW_DELETE;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures;

public class MultiCodecReindexIT extends ReindexTestCase {

public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException {
internalCluster().ensureAtLeastNumDataNodes(1);
Map<String, String> codecMap = Map.of(
"best_compression",
"BEST_COMPRESSION",
"zstd_no_dict",
"ZSTD_NO_DICT",
"zstd",
"ZSTD",
"default",
"BEST_SPEED"
);

for (Map.Entry<String, String> codec : codecMap.entrySet()) {
assertReindexingWithMultipleCodecs(codec.getKey(), codec.getValue(), codecMap);
}

}

private void assertReindexingWithMultipleCodecs(String destCodec, String destCodecMode, Map<String, String> codecMap)
throws ExecutionException, InterruptedException {

final String index = "test-index" + destCodec;
final String destIndex = "dest-index" + destCodec;

// creating source index
createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", "default")
.put("index.merge.policy.max_merged_segment", "1b")
.build()
);
ensureGreen(index);

final int nbDocs = randomIntBetween(2, 5);

// indexing with all 4 codecs
for (Map.Entry<String, String> codec : codecMap.entrySet()) {
useCodec(index, codec.getKey());
ingestDocs(index, nbDocs);
}

assertTrue(
getSegments(index).stream()
.flatMap(s -> s.getAttributes().values().stream())
.collect(Collectors.toSet())
.containsAll(codecMap.values())
);

// creating destination index with destination codec
createIndex(
destIndex,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", destCodec)
.build()
);

BulkByScrollResponse bulkResponse = new ReindexRequestBuilder(client(), ReindexAction.INSTANCE).source(index)
.destination(destIndex)
.refresh(true)
.waitForActiveShards(ActiveShardCount.ONE)
.get();

assertEquals(codecMap.size() * nbDocs, bulkResponse.getCreated());
assertEquals(codecMap.size() * nbDocs, bulkResponse.getTotal());
assertEquals(0, bulkResponse.getDeleted());
assertEquals(0, bulkResponse.getNoops());
assertEquals(0, bulkResponse.getVersionConflicts());
assertEquals(1, bulkResponse.getBatches());
assertTrue(bulkResponse.getTook().getMillis() > 0);
assertEquals(0, bulkResponse.getBulkFailures().size());
assertEquals(0, bulkResponse.getSearchFailures().size());
assertTrue(getSegments(destIndex).stream().allMatch(segment -> segment.attributes.containsValue(destCodecMode)));
}

private void useCodec(String index, String codec) throws ExecutionException, InterruptedException {
assertAcked(client().admin().indices().prepareClose(index));

assertAcked(
client().admin()
.indices()
.updateSettings(new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", codec)))
.get()
);

assertAcked(client().admin().indices().prepareOpen(index));
}

private void flushAndRefreshIndex(String index) {

// Request is not blocked
for (String blockSetting : Arrays.asList(
SETTING_BLOCKS_READ,
SETTING_BLOCKS_WRITE,
SETTING_READ_ONLY,
SETTING_BLOCKS_METADATA,
SETTING_READ_ONLY_ALLOW_DELETE
)) {
try {
enableIndexBlock(index, blockSetting);
// flush
FlushResponse flushResponse = client().admin().indices().prepareFlush(index).setForce(true).execute().actionGet();
assertNoFailures(flushResponse);

// refresh
RefreshResponse refreshResponse = client().admin().indices().prepareRefresh(index).execute().actionGet();
assertNoFailures(refreshResponse);
} finally {
disableIndexBlock(index, blockSetting);
}
}
}

private void ingestDocs(String index, int nbDocs) throws InterruptedException {

indexRandom(
randomBoolean(),
false,
randomBoolean(),
IntStream.range(0, nbDocs)
.mapToObj(i -> client().prepareIndex(index).setId(UUID.randomUUID().toString()).setSource("num", i))
.collect(toList())
);
flushAndRefreshIndex(index);
}

private ArrayList<Segment> getSegments(String index) {

return new ArrayList<>(
client().admin()
.indices()
.segments(new IndicesSegmentsRequest(index))
.actionGet()
.getIndices()
.get(index)
.getShards()
.get(0)
.getShards()[0].getSegments()
);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ teardown:
{
"script" : {
"lang" : "painless",
"source" : "ctx._source['size'] += 10; ctx._source['from'] -= 1; ctx._source['explain'] = !ctx._source['explain']; ctx._source['version'] = !ctx._source['version']; ctx._source['seq_no_primary_term'] = !ctx._source['seq_no_primary_term']; ctx._source['track_scores'] = !ctx._source['track_scores']; ctx._source['track_total_hits'] = 1; ctx._source['min_score'] -= 0.9; ctx._source['terminate_after'] += 2; ctx._source['profile'] = !ctx._source['profile'];"
"source" : "ctx._source['size'] += 10; ctx._source['from'] = ctx._source['from'] <= 0 ? ctx._source['from'] : ctx._source['from'] - 1 ; ctx._source['explain'] = !ctx._source['explain']; ctx._source['version'] = !ctx._source['version']; ctx._source['seq_no_primary_term'] = !ctx._source['seq_no_primary_term']; ctx._source['track_scores'] = !ctx._source['track_scores']; ctx._source['track_total_hits'] = 1; ctx._source['min_score'] -= 0.9; ctx._source['terminate_after'] += 2; ctx._source['profile'] = !ctx._source['profile'];"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,3 +712,51 @@ setup:
- match: { aggregations.m_terms.buckets.0.key: ["a", 1] }
- match: { aggregations.m_terms.buckets.0.key_as_string: "a|1" }
- match: { aggregations.m_terms.buckets.0.doc_count: 4 }

---
"aggregate over multi-terms test":
- skip:
version: "- 2.9.99"
reason: "multi_terms aggregation was introduced in 2.1.0, NPE bug checked by this test case will manifest in any version < 3.0"

- do:
bulk:
index: test_1
refresh: true
body:
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-25"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-25"}'

- do:
search:
index: test_1
size: 0
body:
aggs:
histo:
date_histogram:
field: date
calendar_interval: day
aggs:
m_terms:
multi_terms:
terms:
- field: str
- field: ip

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2022-03-23T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.m_terms.buckets.0.key: ["a", "127.0.0.1"] }
- match: { aggregations.histo.buckets.0.m_terms.buckets.1.key: ["b", "127.0.0.1"] }
- match: { aggregations.histo.buckets.1.key_as_string: "2022-03-24T00:00:00.000Z" }
- length: { aggregations.histo.buckets.1.m_terms.buckets: 0 }
- match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] }
- match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] }
28 changes: 0 additions & 28 deletions sandbox/plugins/custom-codecs/build.gradle

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit be82717

Please sign in to comment.