Skip to content

Commit

Permalink
Enable strict duplicate checks for JSON content
Browse files Browse the repository at this point in the history
With this commit we enable the Jackson feature 'STRICT_DUPLICATE_DETECTION'
by default. This ensures that JSON keys are always unique. While this has
a performance impact, benchmarking has indicated that the typical drop in
indexing throughput is around 1 - 2%.

As a last resort, we allow users to still disable strict duplicate checks
by setting `-Des.json.strict_duplicate_detection=false` which is
intentionally undocumented.

Closes #19614
  • Loading branch information
danielmitterdorfer authored Dec 14, 2016
1 parent 49bdd29 commit 7e50580
Show file tree
Hide file tree
Showing 19 changed files with 364 additions and 250 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.xcontent.XContent;
Expand All @@ -45,17 +46,39 @@ public class JsonXContent implements XContent {
public static XContentBuilder contentBuilder() throws IOException {
return XContentBuilder.builder(jsonXContent);
}

private static final JsonFactory jsonFactory;

public static final JsonXContent jsonXContent;

/*
* NOTE: This comment is only meant for maintainers of the Elasticsearch code base and is intentionally not a Javadoc comment as it
* describes an undocumented system property.
*
*
* Determines whether the JSON parser will always check for duplicate keys in JSON content. This behavior is enabled by default but
* can be disabled by setting the otherwise undocumented system property "es.json.strict_duplicate_detection" to "false".
*
* Before we've enabled this mode, we had custom duplicate checks in various parts of the code base. As the user can still disable this
* mode and fall back to the legacy duplicate checks, we still need to keep the custom duplicate checks around and we also need to keep
* the tests around.
*
* If this fallback via system property is removed one day in the future you can remove all tests that call this method and also remove
* the corresponding custom duplicate check code.
*
*/
public static boolean isStrictDuplicateDetectionEnabled() {
// Don't allow duplicate keys in JSON content by default but let the user opt out
return Booleans.parseBooleanExact(System.getProperty("es.json.strict_duplicate_detection", "true"));
}

static {
jsonFactory = new JsonFactory();
jsonFactory.configure(JsonGenerator.Feature.QUOTE_FIELD_NAMES, true);
jsonFactory.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
jsonFactory.configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
// Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.core.json.UTF8JsonGenerator#close() method
jsonFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);
jsonFactory.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, isStrictDuplicateDetectionEnabled());
jsonXContent = new JsonXContent();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void testFieldsParsing() throws Exception {
assertThat(request.getIndexConstraints()[3].getComparison(), equalTo(LTE));
assertThat(request.getIndexConstraints()[4].getField(), equalTo("field5"));
assertThat(request.getIndexConstraints()[4].getValue(), equalTo("2"));
assertThat(request.getIndexConstraints()[4].getProperty(), equalTo(MAX));
assertThat(request.getIndexConstraints()[4].getProperty(), equalTo(MIN));
assertThat(request.getIndexConstraints()[4].getComparison(), equalTo(GT));
assertThat(request.getIndexConstraints()[5].getField(), equalTo("field5"));
assertThat(request.getIndexConstraints()[5].getValue(), equalTo("9"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsException;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.ESTestCase;

import static org.hamcrest.CoreMatchers.containsString;
Expand All @@ -48,6 +49,8 @@ public void testSimpleJsonSettings() throws Exception {
}

public void testDuplicateKeysThrowsException() {
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
JsonXContent.isStrictDuplicateDetectionEnabled());
final String json = "{\"foo\":\"bar\",\"foo\":\"baz\"}";
final SettingsException e = expectThrows(SettingsException.class, () -> Settings.builder().loadFromSource(json).build());
assertEquals(e.getCause().getClass(), ElasticsearchParseException.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public void testRandomOrder() throws Exception {
}

public void testMissingAllConstructorArgs() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1\n"
+ "}");
Expand All @@ -113,7 +113,7 @@ public void testMissingAllConstructorArgs() throws IOException {
}

public void testMissingAllConstructorArgsButNotRequired() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1\n"
+ "}");
Expand All @@ -122,7 +122,7 @@ public void testMissingAllConstructorArgsButNotRequired() throws IOException {
}

public void testMissingSecondConstructorArg() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1,\n"
+ " \"animal\": \"cat\"\n"
Expand All @@ -133,7 +133,7 @@ public void testMissingSecondConstructorArg() throws IOException {
}

public void testMissingSecondConstructorArgButNotRequired() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1,\n"
+ " \"animal\": \"cat\"\n"
Expand All @@ -146,7 +146,7 @@ public void testMissingSecondConstructorArgButNotRequired() throws IOException {
}

public void testMissingFirstConstructorArg() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1,\n"
+ " \"vegetable\": 2\n"
Expand All @@ -158,7 +158,7 @@ public void testMissingFirstConstructorArg() throws IOException {
}

public void testMissingFirstConstructorArgButNotRequired() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"mineral\": 1,\n"
+ " \"vegetable\": 2\n"
Expand All @@ -169,7 +169,9 @@ public void testMissingFirstConstructorArgButNotRequired() throws IOException {
}

public void testRepeatedConstructorParam() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
JsonXContent.isStrictDuplicateDetectionEnabled());
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"vegetable\": 1,\n"
+ " \"vegetable\": 2\n"
Expand All @@ -182,7 +184,7 @@ public void testRepeatedConstructorParam() throws IOException {
}

public void testBadParam() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"animal\": \"cat\",\n"
+ " \"vegetable\": 2,\n"
Expand All @@ -196,7 +198,7 @@ public void testBadParam() throws IOException {
}

public void testBadParamBeforeObjectBuilt() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"a\": \"supercalifragilisticexpialidocious\",\n"
+ " \"animal\": \"cat\"\n,"
Expand Down Expand Up @@ -256,7 +258,7 @@ void setFoo(String foo) {
parser.declareString(ctorArgOptional ? optionalConstructorArg() : constructorArg(), new ParseField("yeah"));

// ctor arg first so we can test for the bug we found one time
XContentParser xcontent = createParser(JsonXContent.jsonXContent,
XContentParser xcontent = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"yeah\": \"!\",\n"
+ " \"foo\": \"foo\"\n"
Expand All @@ -265,7 +267,7 @@ void setFoo(String foo) {
assertTrue(result.fooSet);

// and ctor arg second just in case
xcontent = createParser(JsonXContent.jsonXContent,
xcontent = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"foo\": \"foo\",\n"
+ " \"yeah\": \"!\"\n"
Expand All @@ -275,7 +277,7 @@ void setFoo(String foo) {

if (ctorArgOptional) {
// and without the constructor arg if we've made it optional
xcontent = createParser(JsonXContent.jsonXContent,
xcontent = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"foo\": \"foo\"\n"
+ "}");
Expand All @@ -285,7 +287,7 @@ void setFoo(String foo) {
}

public void testIgnoreUnknownFields() throws IOException {
XContentParser parser = createParser(JsonXContent.jsonXContent,
XContentParser parser = createParser(JsonXContent.jsonXContent,
"{\n"
+ " \"test\" : \"foo\",\n"
+ " \"junk\" : 2\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;

import com.fasterxml.jackson.core.JsonParseException;
import org.elasticsearch.common.xcontent.BaseXContentTestCase;
import org.elasticsearch.common.xcontent.XContentType;

Expand All @@ -39,4 +40,13 @@ public void testBigInteger() throws Exception {
JsonGenerator generator = new JsonFactory().createGenerator(os);
doTestBigInteger(generator, os);
}

public void testChecksForDuplicates() throws Exception {
assumeTrue("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
JsonXContent.isStrictDuplicateDetectionEnabled());

JsonParseException pex = expectThrows(JsonParseException.class,
() -> XContentType.JSON.xContent().createParser("{ \"key\": 1, \"key\": 2 }").map());
assertEquals("Duplicate field 'key'", pex.getMessage());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ public void testCopyToFieldsParsing() throws Exception {
assertThat(copyTestMap.get("type").toString(), is("text"));
List<String> copyToList = (List<String>) copyTestMap.get("copy_to");
assertThat(copyToList.size(), equalTo(2));
assertThat(copyToList.get(0).toString(), equalTo("another_field"));
assertThat(copyToList.get(1).toString(), equalTo("cyclic_test"));
assertThat(copyToList.get(0), equalTo("another_field"));
assertThat(copyToList.get(1), equalTo("cyclic_test"));

// Check data parsing
BytesReference json = jsonBuilder().startObject()
Expand Down Expand Up @@ -312,44 +312,43 @@ public void testCopyToFieldMerge() throws Exception {
public void testCopyToNestedField() throws Exception {
IndexService indexService = createIndex("test");
DocumentMapperParser parser = indexService.mapperService().documentMapperParser();
for (boolean mapped : new boolean[] {true, false}) {
XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
.endObject()
.startObject("n1")
.field("type", "nested")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
.endObject()
.startObject("n1")
.field("type", "nested")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
.endObject()
.startObject("n2")
.field("type", "nested")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
.endObject()
.startObject("source")
.field("type", "long")
.field("doc_values", false)
.startArray("copy_to")
.value("target") // should go to the root doc
.value("n1.target") // should go to the parent doc
.value("n1.n2.target") // should go to the current doc
.endArray()
.endObject()
.endObject()
.startObject("n2")
.field("type", "nested")
.startObject("properties")
.startObject("target")
.field("type", "long")
.field("doc_values", false)
.endObject()
.startObject("source")
.field("type", "long")
.field("doc_values", false)
.startArray("copy_to")
.value("target") // should go to the root doc
.value("n1.target") // should go to the parent doc
.value("n1.n2.target") // should go to the current doc
.endArray()
.endObject();
for (int i = 0; i < 3; ++i) {
if (mapped) {
mapping = mapping.startObject("target").field("type", "long").field("doc_values", false).endObject();
}
mapping = mapping.endObject().endObject();
}
mapping = mapping.endObject();
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject();

DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping.string()));

Expand All @@ -376,39 +375,38 @@ public void testCopyToNestedField() throws Exception {
.endArray()
.endObject();

ParsedDocument doc = mapper.parse("test", "type", "1", jsonDoc.bytes());
assertEquals(6, doc.docs().size());

Document nested = doc.docs().get(0);
assertFieldValue(nested, "n1.n2.target", 7L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(2);
assertFieldValue(nested, "n1.n2.target", 5L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(3);
assertFieldValue(nested, "n1.n2.target", 3L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

Document parent = doc.docs().get(1);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 7L);
assertFieldValue(parent, "n1.n2.target");

parent = doc.docs().get(4);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 3L, 5L);
assertFieldValue(parent, "n1.n2.target");

Document root = doc.docs().get(5);
assertFieldValue(root, "target", 3L, 5L, 7L);
assertFieldValue(root, "n1.target");
assertFieldValue(root, "n1.n2.target");
}
ParsedDocument doc = mapper.parse("test", "type", "1", jsonDoc.bytes());
assertEquals(6, doc.docs().size());

Document nested = doc.docs().get(0);
assertFieldValue(nested, "n1.n2.target", 7L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(2);
assertFieldValue(nested, "n1.n2.target", 5L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

nested = doc.docs().get(3);
assertFieldValue(nested, "n1.n2.target", 3L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");

Document parent = doc.docs().get(1);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 7L);
assertFieldValue(parent, "n1.n2.target");

parent = doc.docs().get(4);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 3L, 5L);
assertFieldValue(parent, "n1.n2.target");

Document root = doc.docs().get(5);
assertFieldValue(root, "target", 3L, 5L, 7L);
assertFieldValue(root, "n1.target");
assertFieldValue(root, "n1.n2.target");
}

public void testCopyToDynamicNestedObjectParsing() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.AbstractQueryTestCase;
import org.hamcrest.Matchers;
Expand Down Expand Up @@ -339,6 +340,8 @@ public void testUnknownQueryName() throws IOException {
* test that two queries in object throws error
*/
public void testTooManyQueriesInObject() throws IOException {
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
JsonXContent.isStrictDuplicateDetectionEnabled());
String clauseType = randomFrom("must", "should", "must_not", "filter");
// should also throw error if invalid query is preceded by a valid one
String query = "{\n" +
Expand Down
Loading

0 comments on commit 7e50580

Please sign in to comment.