-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix StreamConstraintsException introduced in jackson 2.15 #31580
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,36 @@ | |
@Internal | ||
public class RowJsonUtils { | ||
|
||
// | ||
private static int defaultBufferLimit; | ||
|
||
/** | ||
* Increase the default jackson-databind stream read constraint. | ||
* | ||
* <p>StreamReadConstraints was introduced in jackson 2.15 causing string > 20MB (5MB in 2.15.0) | ||
* parsing failure. This has caused regressions in its dependencies include Beam. Here we | ||
* overwrite the default buffer size limit to 100 MB, and exposes this interface for higher limit. | ||
* If needed, call this method during pipeline run time, e.g. in DoFn.setup. | ||
*/ | ||
public static void increaseDefaultStreamReadConstraints(int newLimit) { | ||
if (newLimit <= defaultBufferLimit) return; | ||
try { | ||
Class<?> unused = Class.forName("com.fasterxml.jackson.core.StreamReadConstraints"); | ||
|
||
com.fasterxml.jackson.core.StreamReadConstraints.overrideDefaultStreamReadConstraints( | ||
com.fasterxml.jackson.core.StreamReadConstraints.builder() | ||
.maxStringLength(newLimit) | ||
.build()); | ||
} catch (ClassNotFoundException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this makes sure pinning jackson-databind 2.14 would not break pipeline. i.e., In case there are other regression in jackson 2.15, use can still pin 2.14 |
||
// <2.15, do nothing | ||
} | ||
defaultBufferLimit = newLimit; | ||
} | ||
|
||
static { | ||
increaseDefaultStreamReadConstraints(100 * 1024 * 1024); | ||
} | ||
|
||
public static ObjectMapper newObjectMapperWith(RowJson.RowJsonDeserializer deserializer) { | ||
SimpleModule module = new SimpleModule("rowDeserializationModule"); | ||
module.addDeserializer(Row.class, deserializer); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
import org.apache.beam.sdk.coders.Coder; | ||
import org.apache.beam.sdk.testing.CoderProperties; | ||
import org.apache.beam.sdk.values.TypeDescriptor; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.JUnit4; | ||
|
@@ -67,6 +68,13 @@ public void testDecodeEncodeEqual() throws Exception { | |
} | ||
} | ||
|
||
@Test | ||
public void testLargeRow() throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before:
after: testLargeRow 0.615s passed Also tested with example Pipeline (read TableRow from BigQuery with large row, then ReShuffle it). |
||
String val = StringUtils.repeat("BEAM", 10 * 1024 * 1024); // 40 MB | ||
TableRow testValue = new TableRowBuilder().set("a", val).set("b", "1").build(); | ||
CoderProperties.coderDecodeEncodeEqual(TEST_CODER, testValue); | ||
} | ||
|
||
/** | ||
* Generated data to check that the wire format has not changed. To regenerate, see {@link | ||
* org.apache.beam.sdk.coders.PrintBase64Encodings}. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
was considering adding a pipeline option to configure this. However, overrideDefaultStreamReadConstraints needs to be called as early as in static context to make it effective for ObjectMapper used in random places, makes it ineffective in non-static context, and ended up with this solution.
After all this is a "band-aid" fix for a bad upstream breaking change. Feel free to comment if there is a better solution.