Skip to content

Commit

Permalink
[C++] Optimize LL1Analyzer
Browse files Browse the repository at this point in the history
  • Loading branch information
jcking authored and parrt committed Feb 1, 2022
1 parent c41663a commit 79ec55f
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 134 deletions.
2 changes: 2 additions & 0 deletions runtime/Cpp/runtime/src/atn/ATNConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ namespace atn {

ATNConfig(ATNConfig const&) = default;

ATNConfig(ATNConfig&&) = default;

virtual ~ATNConfig() = default;

virtual size_t hashCode() const;
Expand Down
242 changes: 141 additions & 101 deletions runtime/Cpp/runtime/src/atn/LL1Analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,141 @@ using namespace antlr4;
using namespace antlr4::atn;
using namespace antlrcpp;

namespace {

struct ATNConfigHasher final {
size_t operator()(const ATNConfig& atn_config) const {
return atn_config.hashCode();
}
};

struct ATNConfigComparer final {
bool operator()(const ATNConfig& lhs, const ATNConfig& rhs) const {
return lhs == rhs;
}
};

class LL1AnalyzerImpl final {
public:
LL1AnalyzerImpl(const ATN& atn, misc::IntervalSet& look, bool seeThruPreds, bool addEOF) : _atn(atn), _look(look), _seeThruPreds(seeThruPreds), _addEOF(addEOF) {}

/// <summary>
/// Compute set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}.
/// <p/>
/// If {@code ctx} is {@code null} and {@code stopState} or the end of the
/// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to
/// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
/// {@code true} and {@code stopState} or the end of the outermost rule is
/// reached, <seealso cref="Token#EOF"/> is added to the result set.
/// </summary>
/// <param name="s"> the ATN state. </param>
/// <param name="stopState"> the ATN state to stop at. This can be a
/// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param>
/// <param name="ctx"> The outer context, or {@code null} if the outer context should
/// not be used. </param>
/// <param name="look"> The result lookahead set. </param>
/// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN
/// from causing a stack overflow. Outside code should pass
/// {@code new HashSet<ATNConfig>} for this argument. </param>
/// <param name="calledRuleStack"> A set used for preventing left recursion in the
/// ATN from causing a stack overflow. Outside code should pass
/// {@code new BitSet()} for this argument. </param>
/// <param name="seeThruPreds"> {@code true} to true semantic predicates as
/// implicitly {@code true} and "see through them", otherwise {@code false}
/// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the
/// result if one is encountered. </param>
/// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the
/// outermost context is reached. This parameter has no effect if {@code ctx}
/// is {@code null}. </param>
void LOOK(ATNState *s, ATNState *stopState, Ref<PredictionContext> const& ctx) {
if (!_lookBusy.insert(ATNConfig(s, 0, ctx)).second) {
return;
}

// ml: s can never be null, hence no need to check if stopState is != null.
if (s == stopState) {
if (ctx == nullptr) {
_look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && _addEOF) {
_look.add(Token::EOF);
return;
}
}

if (s->getStateType() == ATNState::RULE_STOP) {
if (ctx == nullptr) {
_look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && _addEOF) {
_look.add(Token::EOF);
return;
}

if (ctx != PredictionContext::EMPTY) {
bool removed = _calledRuleStack.test(s->ruleIndex);
_calledRuleStack[s->ruleIndex] = false;
// run thru all possible stack tops in ctx
for (size_t i = 0; i < ctx->size(); i++) {
ATNState *returnState = _atn.states[ctx->getReturnState(i)];
LOOK(returnState, stopState, ctx->getParent(i));
}
if (removed) {
_calledRuleStack.set(s->ruleIndex);
}
return;
}
}

size_t n = s->transitions.size();
for (size_t i = 0; i < n; i++) {
Transition *t = s->transitions[i];

if (t->getSerializationType() == Transition::RULE) {
if (_calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex]) {
continue;
}

Ref<PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<RuleTransition*>(t))->followState->stateNumber);

_calledRuleStack.set((static_cast<RuleTransition*>(t))->target->ruleIndex);
LOOK(t->target, stopState, newContext);
_calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex] = false;

} else if (is<AbstractPredicateTransition *>(t)) {
if (_seeThruPreds) {
LOOK(t->target, stopState, ctx);
} else {
_look.add(LL1Analyzer::HIT_PRED);
}
} else if (t->isEpsilon()) {
LOOK(t->target, stopState, ctx);
} else if (t->getSerializationType() == Transition::WILDCARD) {
_look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
} else {
misc::IntervalSet set = t->label();
if (!set.isEmpty()) {
if (is<NotSetTransition*>(t)) {
set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
}
_look.addAll(set);
}
}
}
}

private:
const ATN& _atn;
misc::IntervalSet& _look;
antlrcpp::BitSet _calledRuleStack;
std::unordered_set<ATNConfig, ATNConfigHasher, ATNConfigComparer> _lookBusy;
bool _seeThruPreds;
bool _addEOF;
};

}

std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) const {
std::vector<misc::IntervalSet> look;

Expand All @@ -31,16 +166,11 @@ std::vector<misc::IntervalSet> LL1Analyzer::getDecisionLookahead(ATNState *s) co

look.resize(s->transitions.size()); // Fills all interval sets with defaults.
for (size_t alt = 0; alt < s->transitions.size(); alt++) {
bool seeThruPreds = false; // fail to get lookahead upon pred

ATNConfig::Set lookBusy;
antlrcpp::BitSet callRuleStack;
LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY,
look[alt], lookBusy, callRuleStack, seeThruPreds, false);

LL1AnalyzerImpl impl(_atn, look[alt], false, false);
impl.LOOK(s->transitions[alt]->target, nullptr, PredictionContext::EMPTY);
// Wipe out lookahead for this alternative if we found nothing
// or we had a predicate when we !seeThruPreds
if (look[alt].size() == 0 || look[alt].contains(HIT_PRED)) {
if (look[alt].size() == 0 || look[alt].contains(LL1Analyzer::HIT_PRED)) {
look[alt].clear();
}
}
Expand All @@ -52,99 +182,9 @@ misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, RuleContext *ctx) const {
}

misc::IntervalSet LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const {
misc::IntervalSet r;
bool seeThruPreds = true; // ignore preds; get all lookahead
Ref<PredictionContext> lookContext = ctx != nullptr ? PredictionContext::fromRuleContext(_atn, ctx) : nullptr;

ATNConfig::Set lookBusy;
antlrcpp::BitSet callRuleStack;
LOOK(s, stopState, lookContext, r, lookBusy, callRuleStack, seeThruPreds, true);

misc::IntervalSet r;
LL1AnalyzerImpl impl(_atn, r, true, true);
impl.LOOK(s, stopState, lookContext);
return r;
}

void LL1Analyzer::LOOK(ATNState *s, ATNState *stopState, Ref<PredictionContext> const& ctx, misc::IntervalSet &look,
ATNConfig::Set &lookBusy, antlrcpp::BitSet &calledRuleStack, bool seeThruPreds, bool addEOF) const {

Ref<ATNConfig> c = std::make_shared<ATNConfig>(s, 0, ctx);

if (lookBusy.count(c) > 0) // Keep in mind comparison is based on members of the class, not the actual instance.
return;

lookBusy.insert(c);

// ml: s can never be null, hence no need to check if stopState is != null.
if (s == stopState) {
if (ctx == nullptr) {
look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && addEOF) {
look.add(Token::EOF);
return;
}
}

if (s->getStateType() == ATNState::RULE_STOP) {
if (ctx == nullptr) {
look.add(Token::EPSILON);
return;
} else if (ctx->isEmpty() && addEOF) {
look.add(Token::EOF);
return;
}

if (ctx != PredictionContext::EMPTY) {
bool removed = calledRuleStack.test(s->ruleIndex);
calledRuleStack[s->ruleIndex] = false;
auto onExit = finally([removed, &calledRuleStack, s] {
if (removed) {
calledRuleStack.set(s->ruleIndex);
}
});
// run thru all possible stack tops in ctx
for (size_t i = 0; i < ctx->size(); i++) {
ATNState *returnState = _atn.states[ctx->getReturnState(i)];
LOOK(returnState, stopState, ctx->getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
return;
}
}

size_t n = s->transitions.size();
for (size_t i = 0; i < n; i++) {
Transition *t = s->transitions[i];

if (t->getSerializationType() == Transition::RULE) {
if (calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex]) {
continue;
}

Ref<PredictionContext> newContext = SingletonPredictionContext::create(ctx, (static_cast<RuleTransition*>(t))->followState->stateNumber);
auto onExit = finally([t, &calledRuleStack] {
calledRuleStack[(static_cast<RuleTransition*>(t))->target->ruleIndex] = false;
});

calledRuleStack.set((static_cast<RuleTransition*>(t))->target->ruleIndex);
LOOK(t->target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);

} else if (is<AbstractPredicateTransition *>(t)) {
if (seeThruPreds) {
LOOK(t->target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
} else {
look.add(HIT_PRED);
}
} else if (t->isEpsilon()) {
LOOK(t->target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
} else if (t->getSerializationType() == Transition::WILDCARD) {
look.addAll(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
} else {
misc::IntervalSet set = t->label();
if (!set.isEmpty()) {
if (is<NotSetTransition*>(t)) {
set = set.complement(misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE, static_cast<ssize_t>(_atn.maxTokenType)));
}
look.addAll(set);
}
}
}
}
33 changes: 0 additions & 33 deletions runtime/Cpp/runtime/src/atn/LL1Analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,40 +68,7 @@ namespace atn {
/// specified {@code ctx}. </returns>
misc::IntervalSet LOOK(ATNState *s, ATNState *stopState, RuleContext *ctx) const;

/// <summary>
/// Compute set of tokens that can follow {@code s} in the ATN in the
/// specified {@code ctx}.
/// <p/>
/// If {@code ctx} is {@code null} and {@code stopState} or the end of the
/// rule containing {@code s} is reached, <seealso cref="Token#EPSILON"/> is added to
/// the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
/// {@code true} and {@code stopState} or the end of the outermost rule is
/// reached, <seealso cref="Token#EOF"/> is added to the result set.
/// </summary>
/// <param name="s"> the ATN state. </param>
/// <param name="stopState"> the ATN state to stop at. This can be a
/// <seealso cref="BlockEndState"/> to detect epsilon paths through a closure. </param>
/// <param name="ctx"> The outer context, or {@code null} if the outer context should
/// not be used. </param>
/// <param name="look"> The result lookahead set. </param>
/// <param name="lookBusy"> A set used for preventing epsilon closures in the ATN
/// from causing a stack overflow. Outside code should pass
/// {@code new HashSet<ATNConfig>} for this argument. </param>
/// <param name="calledRuleStack"> A set used for preventing left recursion in the
/// ATN from causing a stack overflow. Outside code should pass
/// {@code new BitSet()} for this argument. </param>
/// <param name="seeThruPreds"> {@code true} to true semantic predicates as
/// implicitly {@code true} and "see through them", otherwise {@code false}
/// to treat semantic predicates as opaque and add <seealso cref="#HIT_PRED"/> to the
/// result if one is encountered. </param>
/// <param name="addEOF"> Add <seealso cref="Token#EOF"/> to the result if the end of the
/// outermost context is reached. This parameter has no effect if {@code ctx}
/// is {@code null}. </param>
private:
void LOOK(ATNState *s, ATNState *stopState, Ref<PredictionContext> const &ctx,
misc::IntervalSet &look, ATNConfig::Set &lookBusy, antlrcpp::BitSet &calledRuleStack,
bool seeThruPreds, bool addEOF) const;

const atn::ATN &_atn;
};

Expand Down

0 comments on commit 79ec55f

Please sign in to comment.