Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Security for _field_names field should not override field statistics #33261

Merged
merged 5 commits into from
Sep 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.mapper.SourceFieldMapper;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
Expand Down Expand Up @@ -70,7 +71,11 @@ static class FieldSubsetDirectoryReader extends FilterDirectoryReader {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new FieldSubsetReader(reader, filter);
try {
return new FieldSubsetReader(reader, filter);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
});
this.filter = filter;
Expand Down Expand Up @@ -109,11 +114,13 @@ public CacheHelper getReaderCacheHelper() {
private final FieldInfos fieldInfos;
/** An automaton that only accepts authorized fields. */
private final CharacterRunAutomaton filter;
/** {@link Terms} cache with filtered stats for the {@link FieldNamesFieldMapper} field. */
private final Terms fieldNamesFilterTerms;

/**
* Wrap a single segment, exposing a subset of its fields.
*/
FieldSubsetReader(LeafReader in, CharacterRunAutomaton filter) {
FieldSubsetReader(LeafReader in, CharacterRunAutomaton filter) throws IOException {
super(in);
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
for (FieldInfo fi : in.getFieldInfos()) {
Expand All @@ -123,6 +130,8 @@ public CacheHelper getReaderCacheHelper() {
}
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
this.filter = filter;
final Terms fieldNameTerms = super.terms(FieldNamesFieldMapper.NAME);
this.fieldNamesFilterTerms = fieldNameTerms == null ? null : new FieldNamesTerms(fieldNameTerms);
}

/** returns true if this field is allowed. */
Expand Down Expand Up @@ -346,21 +355,14 @@ public Terms terms(String field) throws IOException {
}
}

private Terms wrapTerms(Terms terms, String field) {
private Terms wrapTerms(Terms terms, String field) throws IOException {
if (!hasField(field)) {
return null;
} else if (FieldNamesFieldMapper.NAME.equals(field)) {
// for the _field_names field, fields for the document
// are encoded as postings, where term is the field.
// so we hide terms for fields we filter out.
if (terms != null) {
// check for null, in case term dictionary is not a ghostbuster
// So just because its in fieldinfos and "indexed=true" doesn't mean you can go grab a Terms for it.
// It just means at one point there was a document with that field indexed...
// The fields infos isn't updates/removed even if no docs refer to it
terms = new FieldNamesTerms(terms);
}
return terms;
return fieldNamesFilterTerms;
} else {
return terms;
}
Expand All @@ -371,37 +373,43 @@ private Terms wrapTerms(Terms terms, String field) {
* representing fields that should not be visible in this reader.
*/
class FieldNamesTerms extends FilterTerms {
final long size;
final long sumDocFreq;

FieldNamesTerms(Terms in) {
FieldNamesTerms(Terms in) throws IOException {
super(in);
assert in.hasFreqs() == false;
// re-compute the stats for the field to take
// into account the filtered terms.
final TermsEnum e = iterator();
long size = 0, sumDocFreq = 0;
while (e.next() != null) {
size ++;
sumDocFreq += e.docFreq();
}
this.size = size;
this.sumDocFreq = sumDocFreq;
}

@Override
public TermsEnum iterator() throws IOException {
return new FieldNamesTermsEnum(in.iterator());
}

// we don't support field statistics (since we filter out terms)
// but this isn't really a big deal: _field_names is not used for ranking.

@Override
public int getDocCount() throws IOException {
return -1;
public long size() throws IOException {
return size;
}

@Override
public long getSumDocFreq() throws IOException {
return -1;
return sumDocFreq;
}

@Override
public long getSumTotalTermFreq() throws IOException {
return -1;
}

@Override
public long size() throws IOException {
return -1;
public int getDocCount() throws IOException {
// it is costly to recompute this value so we assume that docCount == maxDoc.
return maxDoc();
}
}

Expand Down
Loading