From d92b0ae3ae674db7f1ecc427dfa46db4eaae9064 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 1 Sep 2021 14:33:56 -0400 Subject: [PATCH 1/5] Filter source on read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I found myself needing support for something like `filter_path` on `XContentParser`. It was simple enough to plug it in so I did. Then I realized that it might offer more memory efficient source filtering (#25168) so I put together a quick benchmark comparing the source filtering that we do in `_search`. Filtering using the parser is about 33% faster than how we filter now when you select a single field from a 300 byte document: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message short avgt 5 2360.342 ± 4.715 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message short avgt 5 2010.278 ± 15.042 ns/op FetchSourcePhaseBenchmark.filterXContentOnParser message short avgt 5 1588.446 ± 18.593 ns/op ``` The top line is the way we filter now. The middle line is adding a filter to `XContentBuilder` - something we can do right now without any of my plumbing work. The bottom line is filtering on the parser, requiring all the new plumbing. This isn't particularly impresive. 33% *sounds* great! But 700 nanoseconds per document isn't going to cut into anyone's search times. If you fetch a thousand docuents that's .7 milliseconds of savings. But we mostly advise folks to use source filtering on fetch when the source is large and you only want a small part of it. So I tried when the source is about 4.3kb and you want a single field: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message one_4k_field avgt 5 5957.128 ± 117.402 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4k_field avgt 5 4999.073 ± 96.003 ns/op FetchSourcePhaseBenchmark.filterXContentonParser message one_4k_field avgt 5 3261.478 ± 48.879 ns/op ``` That's 45% faster. Put another way, 2.7 microseconds a document. Not bad! But have a look at how things come out when you want a single field from a 4 *megabyte* document: ``` Benchmark (excludes) (includes) (source) Mode Cnt Score Error Units FetchSourcePhaseBenchmark.filterObjects message one_4m_field avgt 5 8266343.036 ± 176197.077 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4m_field avgt 5 6227560.013 ± 68306.318 ns/op FetchSourcePhaseBenchmark.filterXContentonParser message one_4m_field avgt 5 1617153.472 ± 80164.547 ns/op ``` These documents are very large. I've encountered documents like them in real life, but they've always been the outlier for me. But a 6.5 millisecond per document savings ain't anything to sneeze at. Take a look at what you get when I turn on gc metrics: ``` FetchSourcePhaseBenchmark.filterObjects message one_4m_field avgt 5 7036097.561 ± 84721.312 ns/op FetchSourcePhaseBenchmark.filterObjects:·gc.alloc.rate message one_4m_field avgt 5 2166.613 ± 25.975 MB/sec FetchSourcePhaseBenchmark.filterXContentOnBuilder message one_4m_field avgt 5 6104595.992 ± 55445.508 ns/op FetchSourcePhaseBenchmark.filterXContentOnBuilder:·gc.alloc.rate message one_4m_field avgt 5 2496.978 ± 22.650 MB/sec FetchSourcePhaseBenchmark.filterXContentonParser message one_4m_field avgt 5 1614980.846 ± 31716.956 ns/op FetchSourcePhaseBenchmark.filterXContentonParser:·gc.alloc.rate message one_4m_field avgt 5 1.755 ± 0.035 MB/sec ``` --- .../subphase/FetchSourcePhaseBenchmark.java | 138 ++++++++++++++++++ .../search/fetch/subphase/300b_example.json | 20 +++ .../common/xcontent/XContent.java | 12 ++ .../common/xcontent/cbor/CborXContent.java | 22 ++- .../xcontent/cbor/CborXContentParser.java | 12 ++ .../common/xcontent/json/JsonXContent.java | 22 ++- .../xcontent/json/JsonXContentParser.java | 25 +++- .../common/xcontent/smile/SmileXContent.java | 22 ++- .../xcontent/smile/SmileXContentParser.java | 12 ++ .../common/xcontent/yaml/YamlXContent.java | 19 +++ .../xcontent/yaml/YamlXContentParser.java | 13 ++ .../fetch/subphase/FetchSourcePhase.java | 33 +++-- 12 files changed, 332 insertions(+), 18 deletions(-) create mode 100644 benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java create mode 100644 benchmarks/src/main/resources/org/elasticsearch/benchmark/search/fetch/subphase/300b_example.json diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java new file mode 100644 index 000000000000..7c343f84bc47 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java @@ -0,0 +1,138 @@ +package org.elasticsearch.benchmark.search.fetch.subphase; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.Streams; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.search.fetch.subphase.FetchSourcePhase; +import org.elasticsearch.search.lookup.SourceLookup; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.io.IOException; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +@Fork(1) +@Warmup(iterations = 5) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +public class FetchSourcePhaseBenchmark { + private BytesReference sourceBytes; + private FetchSourceContext fetchContext; + private Set includesSet; + private Set excludesSet; + private FilterPath[] includesFilters; + private FilterPath[] excludesFilters; + + @Param({ "tiny", "short", "one_4k_field", "one_4m_field" }) + private String source; + @Param({ "message" }) + private String includes; + @Param({ "" }) + private String excludes; + + @Setup + public void setup() throws IOException { + switch (source) { + case "tiny": + sourceBytes = new BytesArray("{\"message\": \"short\"}"); + break; + case "short": + sourceBytes = read300BytesExample(); + break; + case "one_4k_field": + sourceBytes = buildBigExample("huge".repeat(1024)); + break; + case "one_4m_field": + sourceBytes = buildBigExample("huge".repeat(1024 * 1024)); + break; + default: + throw new IllegalArgumentException("Unknown source [" + source + "]"); + } + fetchContext = new FetchSourceContext( + true, + Strings.splitStringByCommaToArray(includes), + Strings.splitStringByCommaToArray(excludes) + ); + includesSet = Set.of(fetchContext.includes()); + excludesSet = Set.of(fetchContext.excludes()); + includesFilters = FilterPath.compile(Set.of(fetchContext.includes())); + excludesFilters = FilterPath.compile(Set.of(fetchContext.excludes())); + } + + private BytesReference read300BytesExample() throws IOException { + return Streams.readFully(FetchSourcePhaseBenchmark.class.getResourceAsStream("300b_example.json")); + } + + private BytesReference buildBigExample(String extraText) throws IOException { + String bigger = read300BytesExample().utf8ToString(); + bigger = "{\"huge\": \"" + extraText + "\"," + bigger.substring(1); + return new BytesArray(bigger); + } + + @Benchmark + public BytesReference filterObjects() throws IOException { + SourceLookup lookup = new SourceLookup(); + lookup.setSource(sourceBytes); + Object value = lookup.filter(fetchContext); + return FetchSourcePhase.objectToBytes(value, XContentType.JSON, Math.min(1024, lookup.internalSourceRef().length())); + } + + @Benchmark + public BytesReference filterXContentOnReader() throws IOException { + BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length())); + XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput); + try ( + XContentParser parser = XContentType.JSON.xContent() + .createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + sourceBytes.streamInput(), + includesFilters, + excludesFilters + ) + ) { + builder.copyCurrentStructure(parser); + return BytesReference.bytes(builder); + } + } + + @Benchmark + public BytesReference filterXContentOnBuilder() throws IOException { + BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length())); + XContentBuilder builder = new XContentBuilder( + XContentType.JSON.xContent(), + streamOutput, + includesSet, + excludesSet, + XContentType.JSON.toParsedMediaType() + ); + try ( + XContentParser parser = XContentType.JSON.xContent() + .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, sourceBytes.streamInput()) + ) { + builder.copyCurrentStructure(parser); + return BytesReference.bytes(builder); + } + } +} diff --git a/benchmarks/src/main/resources/org/elasticsearch/benchmark/search/fetch/subphase/300b_example.json b/benchmarks/src/main/resources/org/elasticsearch/benchmark/search/fetch/subphase/300b_example.json new file mode 100644 index 000000000000..8112244c213e --- /dev/null +++ b/benchmarks/src/main/resources/org/elasticsearch/benchmark/search/fetch/subphase/300b_example.json @@ -0,0 +1,20 @@ +{ + "@timestamp": "2099-11-15T14:12:12", + "http": { + "request": { + "method": "get" + }, + "response": { + "bytes": 1070000, + "status_code": 200 + }, + "version": "1.1" + }, + "message": "GET /search HTTP/1.1 200 1070000", + "source": { + "ip": "192.168.0.1" + }, + "user": { + "id": "user" + } +} \ No newline at end of file diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContent.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContent.java index b448a613065a..5010ef0228a8 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContent.java @@ -8,6 +8,7 @@ package org.elasticsearch.common.xcontent; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; import org.elasticsearch.core.RestApiVersion; import java.io.IOException; @@ -58,6 +59,17 @@ XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationH XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, InputStream is) throws IOException; + /** + * Creates a parser over the provided input stream. + */ + XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + InputStream is, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException; + /** * Creates a parser over the provided bytes. */ diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java index baac5d6dc2bf..faddaa303155 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContent.java @@ -12,7 +12,7 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.dataformat.cbor.CBORFactory; -import org.elasticsearch.core.RestApiVersion; + import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContent; @@ -21,6 +21,8 @@ import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; +import org.elasticsearch.core.RestApiVersion; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -80,6 +82,24 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return new CborXContentParser(xContentRegistry, deprecationHandler, cborFactory.createParser(is)); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + InputStream is, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new CborXContentParser( + xContentRegistry, + deprecationHandler, + cborFactory.createParser(is), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java index d6de473a35aa..1412eabf6b53 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/cbor/CborXContentParser.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContentParser; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; public class CborXContentParser extends JsonXContentParser { @@ -28,6 +29,17 @@ public CborXContentParser(NamedXContentRegistry xContentRegistry, super(xContentRegistry, deprecationHandler, parser, restApiVersion); } + public CborXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + JsonParser parser, + RestApiVersion restApiVersion, + FilterPath[] includes, + FilterPath[] excludes + ) { + super(xContentRegistry, deprecationHandler, parser, restApiVersion, includes, excludes); + } + @Override public XContentType contentType() { return XContentType.CBOR; diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java index 7e145b2b49a3..2a9e033be530 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java @@ -12,7 +12,7 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; -import org.elasticsearch.core.RestApiVersion; + import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContent; @@ -20,6 +20,8 @@ import org.elasticsearch.common.xcontent.XContentGenerator; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; +import org.elasticsearch.core.RestApiVersion; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -81,6 +83,24 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return new JsonXContentParser(xContentRegistry, deprecationHandler, jsonFactory.createParser(is)); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + InputStream is, + FilterPath[] include, + FilterPath[] exclude + ) throws IOException { + return new JsonXContentParser( + xContentRegistry, + deprecationHandler, + jsonFactory.createParser(is), + RestApiVersion.current(), + include, + exclude + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java index 9b73847b4db0..378f1f51057e 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java @@ -11,12 +11,16 @@ import com.fasterxml.jackson.core.JsonLocation; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; -import org.elasticsearch.core.RestApiVersion; +import com.fasterxml.jackson.core.filter.FilteringParserDelegate; + import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentLocation; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.support.AbstractXContentParser; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; +import org.elasticsearch.common.xcontent.support.filtering.FilterPathBasedFilter; +import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.core.internal.io.IOUtils; import java.io.IOException; @@ -39,6 +43,25 @@ public JsonXContentParser(NamedXContentRegistry xContentRegistry, this.parser = parser; } + public JsonXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + JsonParser parser, + RestApiVersion restApiVersion, + FilterPath[] include, + FilterPath[] exclude + ) { + super(xContentRegistry, deprecationHandler, restApiVersion); + JsonParser filtered = parser; + if (exclude != null) { + filtered = new FilteringParserDelegate(parser, new FilterPathBasedFilter(exclude, false), true, true); + } + if (include != null) { + filtered = new FilteringParserDelegate(parser, new FilterPathBasedFilter(include, true), true, true); + } + this.parser = filtered; + } + @Override public XContentType contentType() { return XContentType.JSON; diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java index 5d826dea77e0..807b7cccdfd5 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java @@ -13,7 +13,7 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.dataformat.smile.SmileFactory; import com.fasterxml.jackson.dataformat.smile.SmileGenerator; -import org.elasticsearch.core.RestApiVersion; + import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContent; @@ -21,6 +21,8 @@ import org.elasticsearch.common.xcontent.XContentGenerator; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; +import org.elasticsearch.core.RestApiVersion; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -82,6 +84,24 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return new SmileXContentParser(xContentRegistry, deprecationHandler, smileFactory.createParser(is)); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + InputStream is, + FilterPath[] include, + FilterPath[] exclude + ) throws IOException { + return new SmileXContentParser( + xContentRegistry, + deprecationHandler, + smileFactory.createParser(is), + RestApiVersion.current(), + include, + exclude + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContentParser.java index c743849cbca4..a29f9a4843e0 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContentParser.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContentParser; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; public class SmileXContentParser extends JsonXContentParser { @@ -28,6 +29,17 @@ public SmileXContentParser(NamedXContentRegistry xContentRegistry, super(xContentRegistry, deprecationHandler, parser, restApiVersion); } + public SmileXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + JsonParser parser, + RestApiVersion restApiVersion, + FilterPath[] include, + FilterPath[] exclude + ) { + super(xContentRegistry, deprecationHandler, parser, restApiVersion, include, exclude); + } + @Override public XContentType contentType() { return XContentType.SMILE; diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContent.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContent.java index 4c8e7f0f7334..fcdeedaace2d 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContent.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.xcontent.XContentGenerator; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -75,6 +76,24 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return new YamlXContentParser(xContentRegistry, deprecationHandler, yamlFactory.createParser(is)); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + InputStream is, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new YamlXContentParser( + xContentRegistry, + deprecationHandler, + yamlFactory.createParser(is), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContentParser.java index 28fc5fb5e947..c7811831c5b1 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/yaml/YamlXContentParser.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContentParser; +import org.elasticsearch.common.xcontent.support.filtering.FilterPath; public class YamlXContentParser extends JsonXContentParser { @@ -28,6 +29,18 @@ public YamlXContentParser(NamedXContentRegistry xContentRegistry, super(xContentRegistry, deprecationHandler, parser, restApiVersion); } + public YamlXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + JsonParser parser, + RestApiVersion restApiVersion, + FilterPath[] includes, + FilterPath[] excludes + ) { + super(xContentRegistry, deprecationHandler, parser, restApiVersion, includes, excludes); + } + + @Override public XContentType contentType() { return XContentType.YAML; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java index d27e9d6688ac..983aaae3c7fd 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -73,20 +74,7 @@ private void hitExecute(FetchSourceContext fetchSourceContext, HitContext hitCon try { final int initialCapacity = nestedHit ? 1024 : Math.min(1024, source.internalSourceRef().length()); - BytesStreamOutput streamOutput = new BytesStreamOutput(initialCapacity); - XContentBuilder builder = new XContentBuilder(source.sourceContentType().xContent(), streamOutput); - if (value != null) { - builder.value(value); - } else { - // This happens if the source filtering could not find the specified in the _source. - // Just doing `builder.value(null)` is valid, but the xcontent validation can't detect what format - // it is. In certain cases, for example response serialization we fail if no xcontent type can't be - // detected. So instead we just return an empty top level object. Also this is in inline with what was - // being return in this situation in 5.x and earlier. - builder.startObject(); - builder.endObject(); - } - hitContext.hit().sourceRef(BytesReference.bytes(builder)); + hitContext.hit().sourceRef(objectToBytes(value, source.sourceContentType(), initialCapacity)); } catch (IOException e) { throw new ElasticsearchException("Error filtering source", e); } @@ -96,6 +84,23 @@ private static boolean containsFilters(FetchSourceContext context) { return context.includes().length != 0 || context.excludes().length != 0; } + public static BytesReference objectToBytes(Object value, XContentType xContentType, int initialCapacity) throws IOException { + BytesStreamOutput streamOutput = new BytesStreamOutput(initialCapacity); + XContentBuilder builder = new XContentBuilder(xContentType.xContent(), streamOutput); + if (value != null) { + builder.value(value); + } else { + // This happens if the source filtering could not find the specified in the _source. + // Just doing `builder.value(null)` is valid, but the xcontent validation can't detect what format + // it is. In certain cases, for example response serialization we fail if no xcontent type can't be + // detected. So instead we just return an empty top level object. Also this is in inline with what was + // being return in this situation in 5.x and earlier. + builder.startObject(); + builder.endObject(); + } + return BytesReference.bytes(builder); + } + @SuppressWarnings("unchecked") private Map getNestedSource(Map sourceAsMap, HitContext hitContext) { for (SearchHit.NestedIdentity o = hitContext.hit().getNestedIdentity(); o != null; o = o.getChild()) { From e416463c941b3cbd65f381f99782d36080351596 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 2 Sep 2021 09:14:26 -0400 Subject: [PATCH 2/5] Rename --- .../search/fetch/subphase/FetchSourcePhaseBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java index 7c343f84bc47..b313b0f57d3c 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java @@ -99,7 +99,7 @@ public BytesReference filterObjects() throws IOException { } @Benchmark - public BytesReference filterXContentOnReader() throws IOException { + public BytesReference filterXContentOnParser() throws IOException { BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, sourceBytes.length())); XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), streamOutput); try ( From eda57ea7bf7a9b7196fee0f19c18d393adad32dc Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 3 Sep 2021 09:34:29 -0400 Subject: [PATCH 3/5] Test --- .../AbstractXContentFilteringTestCase.java | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java b/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java index ddd56d394a8f..1c1a7190ac13 100644 --- a/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java +++ b/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java @@ -10,6 +10,8 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.Streams; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContent; @@ -20,6 +22,7 @@ import org.elasticsearch.common.xcontent.support.AbstractFilteringTestCase; import java.io.IOException; +import java.io.InputStream; import java.util.Set; import static java.util.Collections.emptySet; @@ -30,8 +33,11 @@ public abstract class AbstractXContentFilteringTestCase extends AbstractFilteringTestCase { - protected final void testFilter(Builder expected, Builder actual, Set includes, Set excludes) throws IOException { - assertFilterResult(expected.apply(createBuilder()), actual.apply(createBuilder(includes, excludes))); + protected final void testFilter(Builder expected, Builder sample, Set includes, Set excludes) throws IOException { + XContentBuilder filtered = randomBoolean() + ? filterOnBuilder(sample, includes, excludes) + : filterOnParser(sample, includes, excludes); + assertFilterResult(expected.apply(createBuilder()), filtered); } protected abstract void assertFilterResult(XContentBuilder expected, XContentBuilder actual); @@ -47,10 +53,30 @@ private XContentBuilder createBuilder() throws IOException { return XContentBuilder.builder(getXContentType().xContent()); } - private XContentBuilder createBuilder(Set includes, Set excludes) throws IOException { + private XContentBuilder filterOnBuilder(Builder sample, Set includes, Set excludes) throws IOException { return XContentBuilder.builder(getXContentType(), includes, excludes); } + private XContentBuilder filterOnParser(Builder sample, Set includes, Set excludes) throws IOException { + FilterPath[] includesFilter = FilterPath.compile(includes); + FilterPath[] excludesFilter = FilterPath.compile(excludes); + try ( + XContentBuilder builtSample = sample.apply(createBuilder()); + InputStream sampleStream = BytesReference.bytes(builtSample).streamInput(); + XContentParser parser = getXContentType().xContent() + .createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + sampleStream, + includesFilter, + excludesFilter + ); + ) { + XContentBuilder result = createBuilder(); + return result.copyCurrentStructure(parser); + } + } + public void testSingleFieldObject() throws IOException { final Builder sample = builder -> builder.startObject().startObject("foo").field("bar", "test").endObject().endObject(); From 35bbf97ac3c3fd0f82077fe21d01de0d39858532 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 3 Sep 2021 11:58:53 -0400 Subject: [PATCH 4/5] test --- .../xcontent/json/JsonXContentParser.java | 4 +- .../support/filtering/FilterPath.java | 7 ++ .../filtering/FilterPathBasedFilter.java | 8 +++ .../AbstractXContentFilteringTestCase.java | 68 ++++++++++++------- 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java index 378f1f51057e..5e2907293b56 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java @@ -54,10 +54,10 @@ public JsonXContentParser( super(xContentRegistry, deprecationHandler, restApiVersion); JsonParser filtered = parser; if (exclude != null) { - filtered = new FilteringParserDelegate(parser, new FilterPathBasedFilter(exclude, false), true, true); + filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(exclude, false), true, true); } if (include != null) { - filtered = new FilteringParserDelegate(parser, new FilterPathBasedFilter(include, true), true, true); + filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(include, true), true, true); } this.parser = filtered; } diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java index b500ee030d33..9e8a9d836fb0 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java @@ -52,6 +52,13 @@ boolean isDoubleWildcard() { return doubleWildcard; } + boolean hasDoubleWildcard() { + if (filter == null) { + return false; + } + return filter.indexOf("**") >= 0; + } + boolean isSimpleWildcard() { return simpleWildcard; } diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPathBasedFilter.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPathBasedFilter.java index c784e7342457..1df347196dd0 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPathBasedFilter.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPathBasedFilter.java @@ -21,6 +21,10 @@ public class FilterPathBasedFilter extends TokenFilter { * or value matches one of the filter paths. */ private static final TokenFilter MATCHING = new TokenFilter() { + @Override + public String toString() { + return "MATCHING"; + } }; /** @@ -28,6 +32,10 @@ public class FilterPathBasedFilter extends TokenFilter { * property names/values matches one of the filter paths. */ private static final TokenFilter NO_MATCHING = new TokenFilter() { + @Override + public String toString() { + return "NO_MATCHING"; + } }; private final FilterPath[] filters; diff --git a/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java b/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java index 1c1a7190ac13..1db805cd9e0f 100644 --- a/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java +++ b/server/src/test/java/org/elasticsearch/common/xcontent/support/filtering/AbstractXContentFilteringTestCase.java @@ -10,8 +10,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.io.Streams; -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContent; @@ -22,7 +20,7 @@ import org.elasticsearch.common.xcontent.support.AbstractFilteringTestCase; import java.io.IOException; -import java.io.InputStream; +import java.util.Arrays; import java.util.Set; import static java.util.Collections.emptySet; @@ -34,10 +32,7 @@ public abstract class AbstractXContentFilteringTestCase extends AbstractFilteringTestCase { protected final void testFilter(Builder expected, Builder sample, Set includes, Set excludes) throws IOException { - XContentBuilder filtered = randomBoolean() - ? filterOnBuilder(sample, includes, excludes) - : filterOnParser(sample, includes, excludes); - assertFilterResult(expected.apply(createBuilder()), filtered); + assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes)); } protected abstract void assertFilterResult(XContentBuilder expected, XContentBuilder actual); @@ -53,27 +48,50 @@ private XContentBuilder createBuilder() throws IOException { return XContentBuilder.builder(getXContentType().xContent()); } + private XContentBuilder filter(Builder sample, Set includes, Set excludes) throws IOException { + if (randomBoolean()) { + return filterOnBuilder(sample, includes, excludes); + } + FilterPath[] excludesFilter = FilterPath.compile(excludes); + if (excludesFilter != null && Arrays.stream(excludesFilter).anyMatch(FilterPath::hasDoubleWildcard)) { + /* + * If there are any double wildcard filters the parser based + * filtering produced weird invalid json. Just field names + * and no objects?! Weird. Anyway, we can't use it. + */ + return filterOnBuilder(sample, includes, excludes); + } + FilterPath[] includesFilter = FilterPath.compile(includes); + return filterOnParser(sample, includesFilter, excludesFilter); + } + private XContentBuilder filterOnBuilder(Builder sample, Set includes, Set excludes) throws IOException { - return XContentBuilder.builder(getXContentType(), includes, excludes); + return sample.apply(XContentBuilder.builder(getXContentType(), includes, excludes)); } - private XContentBuilder filterOnParser(Builder sample, Set includes, Set excludes) throws IOException { - FilterPath[] includesFilter = FilterPath.compile(includes); - FilterPath[] excludesFilter = FilterPath.compile(excludes); - try ( - XContentBuilder builtSample = sample.apply(createBuilder()); - InputStream sampleStream = BytesReference.bytes(builtSample).streamInput(); - XContentParser parser = getXContentType().xContent() - .createParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - sampleStream, - includesFilter, - excludesFilter - ); - ) { - XContentBuilder result = createBuilder(); - return result.copyCurrentStructure(parser); + private XContentBuilder filterOnParser(Builder sample, FilterPath[] includes, FilterPath[] excludes) throws IOException { + try (XContentBuilder builtSample = sample.apply(createBuilder())) { + BytesReference sampleBytes = BytesReference.bytes(builtSample); + try ( + XContentParser parser = getXContentType().xContent() + .createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + sampleBytes.streamInput(), + includes, + excludes + ); + ) { + XContentBuilder result = createBuilder(); + if (sampleBytes.get(sampleBytes.length() - 1) == '\n') { + result.lfAtEnd(); + } + if (parser.nextToken() == null) { + // If the filter removed everything then emit an open/close + return result.startObject().endObject(); + } + return result.copyCurrentStructure(parser); + } } } From 09b189916d3acac04b656d942918640cfdc49bf9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 10 Sep 2021 13:09:06 -0400 Subject: [PATCH 5/5] Fail early is double star excludes Turns out this is fixed in an unreleased version of jackson! --- .../common/xcontent/json/JsonXContentParser.java | 6 ++++++ .../common/xcontent/support/filtering/FilterPath.java | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java index 5e2907293b56..6065c025a8a8 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java @@ -54,6 +54,12 @@ public JsonXContentParser( super(xContentRegistry, deprecationHandler, restApiVersion); JsonParser filtered = parser; if (exclude != null) { + for (FilterPath e : exclude) { + if (e.hasDoubleWildcard()) { + // Fixed in Jackson 2.13 - /~https://github.com/FasterXML/jackson-core/issues/700 + throw new UnsupportedOperationException("double wildcards are not supported in filtered excludes"); + } + } filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(exclude, false), true, true); } if (include != null) { diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java index 9e8a9d836fb0..d42b160c0ef6 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/support/filtering/FilterPath.java @@ -52,7 +52,7 @@ boolean isDoubleWildcard() { return doubleWildcard; } - boolean hasDoubleWildcard() { + public boolean hasDoubleWildcard() { if (filter == null) { return false; }