elastic · matriv · May 7, 2019 · May 6, 2019 · May 6, 2019 · May 6, 2019
diff --git a/docs/reference/sql/limitations.asciidoc b/docs/reference/sql/limitations.asciidoc
@@ -3,6 +3,14 @@
 [[sql-limitations]]
 == SQL Limitations
 
+[float]
+[[large-parsing-trees]]
+=== Large queries may throw `ParsingExpection`
+
+Extremely large queries can consume too much memory during the parsing phase, in which case the {es-sql} engine will
+abort parsing and throw an error. In such cases, consider reducing the query to a smaller size by potentially
+simplifying it or splitting it into smaller queries.
+
 [float]
 [[sys-columns-describe-table-nested-fields]]
 === Nested fields in `SYS COLUMNS` and `DESCRIBE TABLE`

diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/parser/SqlParser.java
@@ -5,7 +5,6 @@
  */
 package org.elasticsearch.xpack.sql.parser;
 
-import com.carrotsearch.hppc.ObjectShortHashMap;
 import org.antlr.v4.runtime.BaseErrorListener;
 import org.antlr.v4.runtime.CharStream;
 import org.antlr.v4.runtime.CommonToken;
@@ -26,16 +25,6 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.xpack.sql.expression.Expression;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BackQuotedIdentifierContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanDefaultContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.BooleanExpressionContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.PrimaryExpressionContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryPrimaryDefaultContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QueryTermContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.QuoteIdentifierContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.StatementDefaultContext;
-import org.elasticsearch.xpack.sql.parser.SqlBaseParser.UnquoteIdentifierContext;
 import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.sql.proto.SqlTypedParamValue;
 
@@ -50,7 +39,6 @@
 import java.util.function.Function;
 
 import static java.lang.String.format;
-import static org.elasticsearch.xpack.sql.parser.AbstractBuilder.source;
 
 public class SqlParser {
 
@@ -100,45 +88,49 @@ private <T> T invokeParser(String sql,
                                List<SqlTypedParamValue> params, Function<SqlBaseParser,
                                ParserRuleContext> parseFunction,
                                BiFunction<AstBuilder, ParserRuleContext, T> visitor) {
-        SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql));
+        try {
+            SqlBaseLexer lexer = new SqlBaseLexer(new CaseInsensitiveStream(sql));
 
-        lexer.removeErrorListeners();
-        lexer.addErrorListener(ERROR_LISTENER);
+            lexer.removeErrorListeners();
+            lexer.addErrorListener(ERROR_LISTENER);
 
-        Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>();
-        TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params);
+            Map<Token, SqlTypedParamValue> paramTokens = new HashMap<>();
+            TokenSource tokenSource = new ParametrizedTokenSource(lexer, paramTokens, params);
 
-        CommonTokenStream tokenStream = new CommonTokenStream(tokenSource);
-        SqlBaseParser parser = new SqlBaseParser(tokenStream);
+            CommonTokenStream tokenStream = new CommonTokenStream(tokenSource);
+            SqlBaseParser parser = new SqlBaseParser(tokenStream);
 
-        parser.addParseListener(new CircuitBreakerListener());
-        parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames())));
+            parser.addParseListener(new PostProcessor(Arrays.asList(parser.getRuleNames())));
 
-        parser.removeErrorListeners();
-        parser.addErrorListener(ERROR_LISTENER);
+            parser.removeErrorListeners();
+            parser.addErrorListener(ERROR_LISTENER);
 
-        parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
+            parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
 
-        if (DEBUG) {
-            debug(parser);
-            tokenStream.fill();
+            if (DEBUG) {
+                debug(parser);
+                tokenStream.fill();
 
-            for (Token t : tokenStream.getTokens()) {
-                String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType());
-                String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType());
-                log.info(format(Locale.ROOT, "  %-15s '%s'",
+                for (Token t : tokenStream.getTokens()) {
+                    String symbolicName = SqlBaseLexer.VOCABULARY.getSymbolicName(t.getType());
+                    String literalName = SqlBaseLexer.VOCABULARY.getLiteralName(t.getType());
+                    log.info(format(Locale.ROOT, "  %-15s '%s'",
                         symbolicName == null ? literalName : symbolicName,
                         t.getText()));
+                }
             }
-        }
 
-        ParserRuleContext tree = parseFunction.apply(parser);
+            ParserRuleContext tree = parseFunction.apply(parser);
 
-        if (DEBUG) {
-            log.info("Parse tree {} " + tree.toStringTree());
-        }
+            if (DEBUG) {
+                log.info("Parse tree {} " + tree.toStringTree());
+            }
 
-        return visitor.apply(new AstBuilder(paramTokens), tree);
+            return visitor.apply(new AstBuilder(paramTokens), tree);
+        } catch (StackOverflowError e) {
+            throw new ParsingException("SQL statement is too large, " +
+                "causing stack overflow when generating the parsing tree: [{}]", sql);
+        }
     }
 
     private static void debug(SqlBaseParser parser) {
@@ -221,93 +213,6 @@ public void exitNonReserved(SqlBaseParser.NonReservedContext context) {
         }
     }
 
-    /**
-     * Used to catch large expressions that can lead to stack overflows
-     */
-    static class CircuitBreakerListener extends SqlBaseBaseListener {
-
-        private static final short MAX_RULE_DEPTH = 200;
-
-        /**
-         * Due to the structure of the grammar and our custom handling in {@link ExpressionBuilder}
-         * some expressions can exit with a different class than they entered:
-         * e.g.: ValueExpressionContext can exit as ValueExpressionDefaultContext
-         */
-        private static final Map<String, String> ENTER_EXIT_RULE_MAPPING = new HashMap<>();
-
-        static {
-            ENTER_EXIT_RULE_MAPPING.put(StatementDefaultContext.class.getSimpleName(), StatementContext.class.getSimpleName());
-            ENTER_EXIT_RULE_MAPPING.put(QueryPrimaryDefaultContext.class.getSimpleName(), QueryTermContext.class.getSimpleName());
-            ENTER_EXIT_RULE_MAPPING.put(BooleanDefaultContext.class.getSimpleName(), BooleanExpressionContext.class.getSimpleName());
-        }
-
-        private boolean insideIn = false;
-
-        // Keep current depth for every rule visited.
-        // The totalDepth alone cannot be used as expressions like: e1 OR e2 OR e3 OR ...
-        // are processed as e1 OR (e2 OR (e3 OR (... and this results in the totalDepth not growing
-        // while the stack call depth is, leading to a StackOverflowError.
-        private ObjectShortHashMap<String> depthCounts = new ObjectShortHashMap<>();
-
-        @Override
-        public void enterEveryRule(ParserRuleContext ctx) {
-            if (inDetected(ctx)) {
-                insideIn = true;
-            }
-
-            // Skip PrimaryExpressionContext for IN as it's not visited on exit due to
-            // the grammar's peculiarity rule with "predicated" and "predicate".
-            // Also skip the Identifiers as they are "cheap".
-            if (ctx.getClass() != UnquoteIdentifierContext.class &&
-                ctx.getClass() != QuoteIdentifierContext.class &&
-                ctx.getClass() != BackQuotedIdentifierContext.class &&
-                ctx.getClass() != SqlBaseParser.ConstantContext.class &&
-                ctx.getClass() != SqlBaseParser.NumberContext.class &&
-                ctx.getClass() != SqlBaseParser.ValueExpressionContext.class &&
-                (insideIn == false || ctx.getClass() != PrimaryExpressionContext.class)) {
-
-                int currentDepth = depthCounts.putOrAdd(ctx.getClass().getSimpleName(), (short) 1, (short) 1);
-                if (currentDepth > MAX_RULE_DEPTH) {
-                    throw new ParsingException(source(ctx), "SQL statement too large; " +
-                        "halt parsing to prevent memory errors (stopped at depth {})", MAX_RULE_DEPTH);
-                }
-            }
-            super.enterEveryRule(ctx);
-        }
-
-        @Override
-        public void exitEveryRule(ParserRuleContext ctx) {
-            if (inDetected(ctx)) {
-                insideIn = false;
-            }
-
-            decrementCounter(ctx);
-            super.exitEveryRule(ctx);
-        }
-
-        ObjectShortHashMap<String> depthCounts() {
-            return depthCounts;
-        }
-
-        private void decrementCounter(ParserRuleContext ctx) {
-            String className = ctx.getClass().getSimpleName();
-            String classNameToDecrement = ENTER_EXIT_RULE_MAPPING.getOrDefault(className, className);
-
-            // Avoid having negative numbers
-            if (depthCounts.containsKey(classNameToDecrement)) {
-                depthCounts.putOrAdd(classNameToDecrement, (short) 0, (short) -1);
-            }
-        }
-
-        private boolean inDetected(ParserRuleContext ctx) {
-            if (ctx.getParent() != null && ctx.getParent().getClass() == SqlBaseParser.PredicateContext.class) {
-                SqlBaseParser.PredicateContext pc = (SqlBaseParser.PredicateContext) ctx.getParent();
-                return pc.kind != null && pc.kind.getType() == SqlBaseParser.IN;
-            }
-            return false;
-        }
-    }
-
     private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() {
         @Override
         public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line,