-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fetch meta fields in FetchFieldsPhase using ValueFetcher #106325
Changes from 5 commits
7f93d0a
1006059
535cada
c840da4
6c31e57
503439f
99f129d
ba0d9e7
9bb8dbc
2369703
9fa44ca
64dbaed
cff6343
d335005
45c5bb9
d336fd5
8889b95
180fce1
c0ef98c
a1f9ad9
89ddd1b
fed48ad
fe91fc0
1c3c141
a1e1166
6bf58bb
495f91f
66687ed
1fd6a9f
556a0f6
e700c95
4e27d44
231f876
2c2429c
6dc595e
7dcf8ce
b197018
1ec9ad4
77d6625
02e74de
cf72f41
7092e1c
60c4d9e
2a71b9a
aa7982c
a7c9f95
d3d6711
ec29233
3f5582e
98d5c6a
ab96ec1
d9b91b0
74b20bb
d6f59d7
bdb7567
5370b3e
d8d1d77
eadfc90
45c3af3
acf09ee
484e5d5
22cf0be
6e778a1
def5d35
b4a1ec6
5941d68
a1ca138
674b354
e0be2b7
38cd4c2
283cb78
dfc680a
4290eac
6687daa
48f35fb
ce16a81
ac592e9
7c33c62
06697a5
f87dbf4
c6d84fc
cf81e82
26b2463
4ace7a0
65bd1ec
f8ed186
db1a8cb
56ea97a
866ce95
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,8 +22,8 @@ setup: | |
--- | ||
fetch fields: | ||
- skip: | ||
version: ' - 8.5.99' | ||
reason: stored fields phase added in 8.6 | ||
version: ' - 8.13.99' | ||
reason: fetch fields and stored_fields using ValueFetcher | ||
|
||
- do: | ||
search: | ||
|
@@ -44,17 +44,21 @@ fetch fields: | |
- match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } | ||
- length: { profile.shards.0.fetch.children: 2 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } | ||
- match: { profile.shards.0.fetch.children.1.type: StoredFieldsPhase } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.1.type: StoredFieldsPhase } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
|
||
--- | ||
fetch source: | ||
- skip: | ||
version: ' - 8.5.99' | ||
reason: stored fields phase added in 8.6 | ||
version: ' - 8.13.99' | ||
reason: fetch fields and stored_fields using ValueFetcher | ||
|
||
- do: | ||
search: | ||
|
@@ -71,20 +75,21 @@ fetch source: | |
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } | ||
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } | ||
- match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } | ||
- length: { profile.shards.0.fetch.children: 2 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchSourcePhase } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.0.debug.fast_path: 1 } | ||
- match: { profile.shards.0.fetch.children.1.type: StoredFieldsPhase } | ||
- length: { profile.shards.0.fetch.children: 3 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } | ||
- match: { profile.shards.0.fetch.children.1.type: FetchSourcePhase } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.1.debug.fast_path: 1 } | ||
- match: { profile.shards.0.fetch.children.2.type: StoredFieldsPhase } | ||
|
||
--- | ||
fetch nested source: | ||
- skip: | ||
version: ' - 8.5.99' | ||
reason: stored fields phase added in 8.6 | ||
version: ' - 8.13.99' | ||
reason: fetch fields and stored_fields using ValueFetcher | ||
|
||
- do: | ||
indices.create: | ||
|
@@ -135,24 +140,25 @@ fetch nested source: | |
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } | ||
- gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } | ||
- match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } | ||
- length: { profile.shards.0.fetch.children: 3 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchSourcePhase } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.0.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.1.type: InnerHitsPhase } | ||
- length: { profile.shards.0.fetch.children: 4 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } | ||
- match: { profile.shards.0.fetch.children.1.type: FetchSourcePhase } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.1.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.2.type: StoredFieldsPhase } | ||
- match: { profile.shards.0.fetch.children.2.type: InnerHitsPhase } | ||
- gt: { profile.shards.0.fetch.children.2.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.2.breakdown.next_reader: 0 } | ||
- gt: { profile.shards.0.fetch.children.2.breakdown.next_reader_count: 0 } | ||
- gt: { profile.shards.0.fetch.children.2.breakdown.next_reader: 0 } | ||
- match: { profile.shards.0.fetch.children.3.type: StoredFieldsPhase } | ||
|
||
--- | ||
disabling stored fields removes fetch sub phases: | ||
- skip: | ||
version: ' - 7.15.99' | ||
reason: fetch profiling implemented in 7.16.0 | ||
version: ' - 8.13.99' | ||
reason: fetch fields and stored_fields using ValueFetcher | ||
|
||
- do: | ||
search: | ||
|
@@ -163,7 +169,8 @@ disabling stored fields removes fetch sub phases: | |
|
||
- match: { hits.hits.0._index: test } | ||
- match: { profile.shards.0.fetch.debug.stored_fields: [] } | ||
- is_false: profile.shards.0.fetch.children | ||
- length: { profile.shards.0.fetch.children: 1 } | ||
- match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this is an oversight and should be reverted. The fact that this test needs to be adapted is a signal that we have done something wrong. When we ask for
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was the purpose of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would break another test in
|
||
|
||
--- | ||
dfs knn vector profiling: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,13 +51,13 @@ public List<Object> fetchValues(Source source, int doc, List<Object> includedVal | |
for (Object entry : nestedValues) { | ||
// add this one entry only to the stub and use this as source lookup | ||
stub.put(nestedFieldName, entry); | ||
Map<String, DocumentField> fetchResult = nestedFieldFetcher.fetch( | ||
FieldFetcher.DocAndMetaFields fetchResult = nestedFieldFetcher.fetch( | ||
Source.fromMap(filteredSource, source.sourceContentType()), | ||
doc | ||
); | ||
|
||
Map<String, Object> nestedEntry = new HashMap<>(); | ||
for (DocumentField field : fetchResult.values()) { | ||
for (DocumentField field : fetchResult.documentFields().values()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I am using just the document fields because there is no one of the metadata fields used in the response at the top level that need to be used in a nested context. |
||
List<Object> fetchValues = field.getValues(); | ||
if (fetchValues.isEmpty() == false) { | ||
String keyInNestedMap = field.getName().substring(nestedFieldPath.length() + 1); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,29 +10,69 @@ | |
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.common.document.DocumentField; | ||
import org.elasticsearch.search.SearchHit; | ||
import org.elasticsearch.index.mapper.IgnoredFieldMapper; | ||
import org.elasticsearch.index.mapper.LegacyTypeFieldMapper; | ||
import org.elasticsearch.index.mapper.RoutingFieldMapper; | ||
import org.elasticsearch.search.fetch.FetchContext; | ||
import org.elasticsearch.search.fetch.FetchSubPhase; | ||
import org.elasticsearch.search.fetch.FetchSubPhaseProcessor; | ||
import org.elasticsearch.search.fetch.StoredFieldsContext; | ||
import org.elasticsearch.search.fetch.StoredFieldsSpec; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* A fetch sub-phase for high-level field retrieval. Given a list of fields, it | ||
* retrieves the field values through the relevant {@link org.elasticsearch.index.mapper.ValueFetcher} | ||
* and returns them as document fields. | ||
*/ | ||
public final class FetchFieldsPhase implements FetchSubPhase { | ||
|
||
private static final List<String> ROOT_LEVEL_METADATA_FIELD_NAMES = List.of( | ||
IgnoredFieldMapper.NAME, | ||
RoutingFieldMapper.NAME, | ||
LegacyTypeFieldMapper.NAME | ||
); | ||
|
||
private static final List<FieldAndFormat> ROOT_LEVEL_METADATA_FIELDS = ROOT_LEVEL_METADATA_FIELD_NAMES.stream() | ||
.map(field -> new FieldAndFormat(field, null)) | ||
.toList(); | ||
|
||
public static boolean isMetadataField(final String field) { | ||
return ROOT_LEVEL_METADATA_FIELD_NAMES.stream().anyMatch(f -> f.equals(field)); | ||
} | ||
|
||
private static <T> List<T> emptyListIfNull(final List<T> theList) { | ||
return theList == null ? Collections.emptyList() : theList; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that this is problematic, because you have a separate definition of metadata fields within fetch fields. Plugins (like |
||
|
||
@Override | ||
public FetchSubPhaseProcessor getProcessor(FetchContext fetchContext) { | ||
FetchFieldsContext fetchFieldsContext = fetchContext.fetchFieldsContext(); | ||
if (fetchFieldsContext == null) { | ||
return null; | ||
} | ||
final FetchFieldsContext fetchFieldsContext = fetchContext.fetchFieldsContext(); | ||
final StoredFieldsContext storedFieldsContext = fetchContext.storedFieldsContext(); | ||
|
||
boolean fetchStoredFields = storedFieldsContext != null && storedFieldsContext.fetchFields(); | ||
final List<FieldAndFormat> storedFields = storedFieldsContext == null | ||
? Collections.emptyList() | ||
: emptyListIfNull(storedFieldsContext.fieldNames()).stream() | ||
.map(storedField -> new FieldAndFormat(storedField, null)) | ||
.distinct() | ||
.toList(); | ||
final List<FieldAndFormat> storedFieldsIncludingDefaultMetadataFields = fetchStoredFields | ||
? Stream.concat(ROOT_LEVEL_METADATA_FIELDS.stream(), storedFields.stream()).distinct().toList() | ||
: Collections.emptyList(); | ||
|
||
FieldFetcher fieldFetcher = FieldFetcher.create(fetchContext.getSearchExecutionContext(), fetchFieldsContext.fields()); | ||
final List<FieldAndFormat> fetchFields = fetchFieldsContext == null | ||
? Collections.emptyList() | ||
: emptyListIfNull(fetchFieldsContext.fields()); | ||
final FieldFetcher fieldFetcher = FieldFetcher.create( | ||
fetchContext.getSearchExecutionContext(), | ||
Stream.concat(fetchFields.stream(), storedFieldsIncludingDefaultMetadataFields.stream()).toList() | ||
); | ||
|
||
return new FetchSubPhaseProcessor() { | ||
@Override | ||
|
@@ -47,11 +87,10 @@ public StoredFieldsSpec storedFieldsSpec() { | |
|
||
@Override | ||
public void process(HitContext hitContext) throws IOException { | ||
Map<String, DocumentField> documentFields = fieldFetcher.fetch(hitContext.source(), hitContext.docId()); | ||
SearchHit hit = hitContext.hit(); | ||
for (Map.Entry<String, DocumentField> entry : documentFields.entrySet()) { | ||
hit.setDocumentField(entry.getKey(), entry.getValue()); | ||
} | ||
final FieldFetcher.DocAndMetaFields fields = fieldFetcher.fetch(hitContext.source(), hitContext.docId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to simplify things, I would propose that we make no changes to |
||
final Map<String, DocumentField> documentFields = fields.documentFields(); | ||
final Map<String, DocumentField> metadataFields = fields.metadataFields(); | ||
hitContext.hit().addDocumentFields(documentFields, metadataFields); | ||
} | ||
}; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is no longer needed