Skip to content

Commit

Permalink
[Console] Update extract string literal logic (elastic#72628) (elasti…
Browse files Browse the repository at this point in the history
  • Loading branch information
jloleysens authored Aug 20, 2020
1 parent 8db1dcc commit bb06629
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import collapsingTests from './utils_string_collapsing.txt';
import expandingTests from './utils_string_expanding.txt';

import * as utils from '../index';
import { extractJSONStringValues } from '../parser';

describe('JSON to XJSON conversion tools', () => {
it('will collapse multiline strings', () => {
Expand All @@ -34,6 +35,32 @@ describe('JSON to XJSON conversion tools', () => {
const multiline = '{ "foo": """bar\r\nbaz""" }';
expect(utils.collapseLiteralStrings(multiline)).toEqual('{ "foo": "bar\\r\\nbaz" }');
});

describe('JSON string values parser', () => {
test('correctly extracts JSON string values', () => {
const json = {
myString: 'string',
notAString: 1,
myStringArray: ['a', 1, 'test', { nestedString: 'string' }],
};
const jsonString = JSON.stringify(json);
const { stringValues } = extractJSONStringValues(jsonString);
expect(stringValues.length).toBe(4);

expect(jsonString.substring(stringValues[0].startIndex, stringValues[0].endIndex + 1)).toBe(
'"string"'
);
expect(jsonString.substring(stringValues[1].startIndex, stringValues[1].endIndex + 1)).toBe(
'"a"'
);
expect(jsonString.substring(stringValues[2].startIndex, stringValues[2].endIndex + 1)).toBe(
'"test"'
);
expect(jsonString.substring(stringValues[3].startIndex, stringValues[3].endIndex + 1)).toBe(
'"string"'
);
});
});
});

_.each(collapsingTests.split(/^=+$/m), function (fixture) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@

==========
Scripts in requests
-------------------------------------
{
"f": { "script": { "source": "\ntest\ntest\\\\\\\\\\\\\\\\2\n" } },
"f": { "script": { "source": "\ntest\ntest\\2\n" } },
"g": { "script": "second + \"\\\";" },
"a": "short with \\",
"\\\\h": 1,
Expand All @@ -12,7 +13,7 @@ Scripts in requests
{
"f": { "script": { "source": """
test
test\\\\\\\\2
test\2
""" } },
"g": { "script": """second + "\";""" },
"a": """short with \""",
Expand All @@ -23,11 +24,11 @@ test\\\\\\\\2
Preserve triple quotes
-------------------------------------
{
"content\\\": "tri\"ple",
"content\\": "tri\"ple",
}
-------------------------------------
{
"content\\\": """tri"ple""",
"content\\": """tri"ple""",
}
==========
Correctly parse with JSON embedded inside values
Expand Down Expand Up @@ -82,3 +83,13 @@ Single quotes escaped special case, end
{
"query": "test\""
}
==========
Strings in Arrays
-------------------------------------
{
"array": ["expand \\ me", "do not expand", "do expand \\"]
}
-------------------------------------
{
"array": ["""expand \ me""", "do not expand", """do expand \"""]
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
* under the License.
*/

import { extractJSONStringValues } from './parser';

export function collapseLiteralStrings(data: string) {
const splitData = data.split(`"""`);
for (let idx = 1; idx < splitData.length - 1; idx += 2) {
Expand All @@ -25,47 +27,60 @@ export function collapseLiteralStrings(data: string) {
return splitData.join('');
}

/*
The following regex describes global match on:
1. one colon followed by any number of space characters
2. one double quote (not escaped, special case for JSON in JSON).
3. greedily match any non double quote and non newline char OR any escaped double quote char (non-capturing).
4. handle a special case where an escaped slash may be the last character
5. one double quote
For instance: `: "some characters \" here"`
Will match and be expanded to: `"""some characters " here"""`
// 5 megabytes
const MAX_EXPANDABLE_JSON_SIZE = 5 * 1024 * 1024;

/**
* Takes in a string representing some JSON data and expands strings,
* where needed, to a string literal representation.
*
* For example; given a value like: "{ "my_string": "\nhey!\n" }"
*
* Will return: "{ "my_string": """
* hey!
* """
* }"
*/
export function expandLiteralStrings(data: string) {
// Assuming 1 byte per char
if (data.length > MAX_EXPANDABLE_JSON_SIZE) {
return data;
}

const LITERAL_STRING_CANDIDATES = /((:[\s\r\n]*)([^\\])"(\\"|[^"\n])*\\?")/g;
const { stringValues } = extractJSONStringValues(data);

export function expandLiteralStrings(data: string) {
return data.replace(LITERAL_STRING_CANDIDATES, (match, string) => {
// Expand to triple quotes if there are _any_ slashes
if (string.match(/\\./)) {
const firstDoubleQuoteIdx = string.indexOf('"');
const lastDoubleQuoteIdx = string.lastIndexOf('"');
if (stringValues.length === 0) {
return data;
}

// Handle a special case where we may have a value like "\"test\"". We don't
// want to expand this to """"test"""" - so we terminate before processing the string
// further if we detect this either at the start or end of the double quote section.
// Include JSON before our first string value
let result = data.substring(0, stringValues[0].startIndex);

if (string[firstDoubleQuoteIdx + 1] === '\\' && string[firstDoubleQuoteIdx + 2] === '"') {
return string;
}
for (let x = 0; x < stringValues.length; x++) {
const { startIndex, endIndex } = stringValues[x];
const candidate = data.substring(startIndex, endIndex + 1);

if (string[lastDoubleQuoteIdx - 1] === '"' && string[lastDoubleQuoteIdx - 2] === '\\') {
return string;
}
// Handle a special case where we may have a value like "\"test\"". We don't
// want to expand this to """"test"""" - so we terminate before processing the string
// further if we detect this either at the start or end of the double quote section.
const skip =
(candidate[1] === '\\' && candidate[2] === '"') ||
(candidate[candidate.length - 2] === '"' && candidate[candidate.length - 3] === '\\');

const colonAndAnySpacing = string.slice(0, firstDoubleQuoteIdx);
const rawStringifiedValue = string.slice(firstDoubleQuoteIdx, string.length);
// Remove one level of JSON stringification
const jsonValue = JSON.parse(rawStringifiedValue);
return `${colonAndAnySpacing}"""${jsonValue}"""`;
if (!skip && candidate.match(/\\./)) {
result += `"""${JSON.parse(candidate)}"""`;
} else {
return string;
result += candidate;
}

if (stringValues[x + 1]) {
// Add any JSON between string values
result += data.substring(endIndex + 1, stringValues[x + 1].startIndex);
}
});
}

// Add any remaining JSON after all string values
result += data.substring(stringValues[stringValues.length - 1].endIndex + 1);

return result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

type StringValues = Array<{ startIndex: number; endIndex: number }>;

interface ParseResult {
stringValues: StringValues;
}

const JSON_COLON = ':';
const JSON_STRING_DELIMITER = '"';
const JSON_STRING_ESCAPE = '\\';

/**
* Accepts JSON (as a string) and extracts the positions of all JSON string
* values.
*
* For example:
*
* '{ "my_string_value": "is this", "my_number_value": 42 }'
*
* Would extract one result:
*
* [ { startIndex: 21, endIndex: 29 } ]
*
* This result maps to `"is this"` from the example JSON.
*
*/
export const extractJSONStringValues = (input: string): ParseResult => {
let position = 0;
let currentStringStartPos: number;
let isInsideString = false;
const stringValues: StringValues = [];

function read() {
return input[position];
}

function peekNextNonWhitespace(): string | undefined {
let peekPosition = position + 1;

while (peekPosition < input.length) {
const peekChar = input[peekPosition];
if (peekChar.match(/[^\s\r\n]/)) {
return peekChar;
}
++peekPosition;
}
}

function advance() {
++position;
}

while (position < input.length) {
const char = read();
if (!isInsideString) {
if (char === JSON_STRING_DELIMITER) {
currentStringStartPos = position;
isInsideString = true;
}
// else continue scanning for JSON_STRING_DELIMITER
} else {
if (char === JSON_STRING_ESCAPE) {
// skip ahead - we are still inside of a string
advance();
} else if (char === JSON_STRING_DELIMITER) {
if (peekNextNonWhitespace() !== JSON_COLON) {
stringValues.push({
startIndex: currentStringStartPos!,
endIndex: position,
});
}
isInsideString = false;
}
}
advance();
}

return { stringValues };
};

0 comments on commit bb06629

Please sign in to comment.