diff --git a/lib/lrama/option_parser.rb b/lib/lrama/option_parser.rb index 5dc4a25d..71c55558 100644 --- a/lib/lrama/option_parser.rb +++ b/lib/lrama/option_parser.rb @@ -59,7 +59,7 @@ def parse_by_option_parser(argv) o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } o.on('-t', 'reserved, do nothing') { } o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } - o.on('-D', '--define=NAME[=VALUE]', "similar to '%define NAME VALUE'") {|v| @options.define = v } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } diff --git a/lib/lrama/options.rb b/lib/lrama/options.rb index 84414021..7d83420b 100644 --- a/lib/lrama/options.rb +++ b/lib/lrama/options.rb @@ -21,11 +21,5 @@ def initialize @y = STDIN @debug = false end - - def define=(v) - v.split(',').each do |p_define| - @define.store *p_define.split('=') - end - end end end diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index a68b5b73..8f3e95ad 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -3,12 +3,13 @@ require "lrama/state/resolved_conflict" require "lrama/state/shift" require "lrama/state/shift_reduce_conflict" +require "lrama/state/inadequacy_annotation" module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, :default_reduction_rule, :closure, :items, :predecessors - attr_accessor :shifts, :reduces + attr_accessor :shifts, :reduces, :lalr_isocore def initialize(id, accessing_symbol, kernels) @id = id @@ -22,6 +23,7 @@ def initialize(id, accessing_symbol, kernels) @resolved_conflicts = [] @default_reduction_rule = nil @predecessors = [] + @lalr_isocore = self end def closure=(closure) @@ -155,7 +157,7 @@ def internal_dependencies(shift, next_state) nterm_transitions.select {|other_shift, _| @items.find {|item| item.next_sym == shift.next_sym && item.lhs == other_shift.next_sym && item.symbols_after_dot.all?(&:nullable) } }.reduce([[shift, next_state]]) {|result, transition| - result += internal_follows(*transition) + result += internal_dependencies(*transition) } end @@ -167,16 +169,111 @@ def successor_dependencies(shift, next_state) } end + def inspect + "#{id} -> #{@kernels.map(&:to_s).join(', ')}" + end + def inadequacy_list return @inadequacy_list if @inadequacy_list - list = shifts.to_h {|shift| [shift.next_sym, [[shift, nil]]] } - reduces.each do |reduce| - reduce_list = (reduce.look_ahead || []).to_h {|sym| [sym, [[reduce, reduce.item]]] } - list.merge!(reduce_list) {|_, list_value, reduce_value| list_value + reduce_value } + shift_contributions = shifts.to_h {|shift| + [shift.next_sym, [shift]] + } + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).to_h {|sym| + [sym, [reduce]] + } + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a.union(b) } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a.union(b) } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotate_manifestation + inadequacy_list.map {|token, actions| + actions.map {|action| + if action.is_a?(Shift) + [InadequacyAnnotation.new(token: token, action: action, item: nil, contributed: false)] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + lhs_contributions(action.rule.lhs, token).map {|kernel, contributed| + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + else + kernels.map {|kernel| + contributed = kernel.rule == action.rule && kernel.end_of_rule? + InadequacyAnnotation.new(token: token, action: action, item: kernel, contributed: contributed) + } + end + end + } + } + end + + def annotate_predecessor(annotation_list) + annotation_list.reduce([]) {|annotation| + next [token, {}] if annotation.no_contributions? || actions.any? {|action, hash| + p action, hash + hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } + } + [ + token, actions.to_h {|action, hash| + [ + action, hash.to_h {|item, _| + kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } + [kernel, + hash[item] && + ( + !kernel.nil? && (state.item_lookahead_set[kernel].include?(token)) || + (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) + ) + ] + } + ] + } + ] + } + end + + def item_lookahead_set + @item_lookahead_set ||= + kernels.to_h {|item| + value = + if item.position > 1 + prev_state, prev_item = predecessor_with_item(item) + prev_state.item_lookahead_set[prev_item] + elsif item.position == 1 + prev_state = predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + else + [] + end + [item, value] + } + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessor_with_item(item) + predecessors.each do |state| + state.kernels.each do |kernel| + return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 + end end + end - @inadequacy_list = {self => list.select {|_, actions| actions.size > 1 }} + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + [] + else + kernels.map {|kernel| [kernel, follow_kernel?(kernel) && item_lookahead_set[kernel].include?(token)] } + end end def follow_kernel?(item) @@ -187,8 +284,39 @@ def follow_kernel_items(shift, next_state, item) internal_dependencies(shift, next_state).any? {|shift, _| shift.next_sym == item.next_sym } && item.symbols_after_dot.all?(&:nullable) end + def next_terms + shifts.filter_map {|shift| shift.next_sym.term? && shift.next_sym } + end + def append_predecessor(prev_state) @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follows(shift, next_state) + include_dependencies(shift, next_state).reduce([]) {|result, goto| + st, sh, next_st = goto + result.union(st.always_follows(sh, next_st)) + } + end + + def include_dependencies(shift, next_state) + internal = internal_dependencies(shift, next_state).map {|sh, next_st| [self, sh, next_st] } + pred = predecessor_dependencies(shift, next_state) + + return internal if pred.empty? + dependency = internal.union(pred) + + dependency.reduce(dependency) {|result, goto| result.union(compute_include_dependencies(*goto)) } + end + + def predecessor_dependencies(shift, next_state) + item = kernels.find {|kernel| kernel.next_sym == shift.next_sym } + return [] unless item.symbols_after_transition.all?(&:nullable) + + st = @predecessors.find {|p| p.items.find {|i| i.rule == item.rule && i.position == item.position - 1 } } + sh, next_st = s.nterm_transitions.find {|shift, _| shift.next_token == item.lhs } + [[s, sh, next_st]] end end end diff --git a/lib/lrama/state/inadequacy_annotation.rb b/lib/lrama/state/inadequacy_annotation.rb new file mode 100644 index 00000000..7a1f518d --- /dev/null +++ b/lib/lrama/state/inadequacy_annotation.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class InadequacyAnnotation < Struct.new(:token, :action, :item, :contributed, keyword_init: true) + def no_contributions? + item.nil? && !contributed + end + end + end +end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 656a3e22..7ca627f7 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -96,6 +96,14 @@ def compute def compute_ielr report_duration(:compute_predecessors) { compute_predecessors } report_duration(:split_states) { split_states } + @states.each {|state| p state, state.transitions, state.item_lookahead_set } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } end def reporter @@ -570,19 +578,23 @@ def compute_predecessors end def split_states - @item_lookahead_set = {} - @lalr_isocores = Hash.new {|hash, key| hash[key] = key } @ielr_isocores = Hash.new {|hash, key| hash[key] = [key] } @lookaheads_recomputed = Hash.new {|hash, key| hash[key] = false } - @states.each do |state| - state.transitions.each do |shift, next_state| - compute_state(state, shift, next_state) + transition_queue = [] + @states.first.transitions.each do |shift, next_state| + transition_queue << [@states.first, shift, next_state] + end + until transition_queue.empty? + state, shift, next_state = transition_queue.shift + compute_state(state, shift, next_state) + next_state.transitions.each do |sh, next_st| + transition_queue << [next_state, sh, next_st] end end end def merge_lookaheads(state, k) - return if state.kernels.all? {|item| (k[item] - item_lookahead_set(state)[item]).empty? } + return if state.kernels.all? {|item| (k[item] - state.item_lookahead_set[item]).empty? } state.transitions.each do |shift, next_state| next if @lookaheads_recomputed[next_state] @@ -603,18 +615,19 @@ def compute_state(state, shift, next_state) new_state.set_items_to_state(sh.next_items, next_state) end @states << new_state - @lalr_isocores[new_state] = s + new_state.lalr_isocore = s @ielr_isocores[s] << new_state @ielr_isocores[s].each do |st| @ielr_isocores[st] = @ielr_isocores[s] end @lookaheads_recomputed[new_state] = true - @item_lookahead_set[new_state] = k + new_state.item_lookahead_set = k state.update_transition(shift, new_state) elsif(!@lookaheads_recomputed[s]) - @item_lookahead_set[s] = k + s.item_lookahead_set = k @lookaheads_recomputed[s] = true else + state.update_transition(shift, s) merge_lookaheads(s, k) end end @@ -623,29 +636,33 @@ def propagate_lookaheads(state, next_state) next_state.kernels.to_h {|item| lookahead_sets = if item.position == 1 - compute_goto_follow_set(@lalr_isocores[state], item.lhs) + compute_goto_follow_set(state.lalr_isocore, item.lhs) else kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } - item_lookahead_set(state)[kernel] + state.item_lookahead_set[kernel] end + # p [state, lookahead_sets, lookahead_set_filters(next_state)[item]] + [item, lookahead_sets & lookahead_set_filters(next_state)[item]] } end def lookahead_set_filters(state) + p state state.kernels.to_h {|kernel| + # p [state, kernel, annotation_list(@lalr_isocores[state])] [kernel, - annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].filter_map {|token, actions| + annotation_list(state.lalr_isocore).filter_map {|token, actions| token if token.term? && actions.any? {|item, _| item == kernel } }] } end def is_compatible(state, k) - @lookaheads_recomputed[state] || - annotation_list(@lalr_isocores[state])[@lalr_isocores[state]].all? {|token, actions| - a = dominant_contribution(state, token, actions, item_lookahead_set(state)) + !@lookaheads_recomputed[state] || + annotation_list(state.lalr_isocores).all? {|token, actions| + a = dominant_contribution(state, token, actions, state.item_lookahead_set) b = dominant_contribution(state, token, actions, k) a.empty? || b.empty? || a == b } @@ -669,119 +686,15 @@ def compute_goto_follow_set(state, nterm_token) state.always_follows(shift, next_state).union(state.kernels.select {|item| state.follow_kernel_items(shift, next_state, item) }.reduce([]) {|result, item| - result.union(item_lookahead_set(state)[item]) + result.union(state.item_lookahead_set[item]) }) end - def item_lookahead_set(state) - @item_lookahead_set[state] ||= - state.kernels.to_h {|item| - value = - if item.position > 1 - prev_state, prev_item = predecessor_with_item(state, item) - item_lookahead_set(prev_state)[prev_item] - elsif item.position == 1 - prev_state = state.predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } - shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } - goto_follows(prev_state, shift, next_state) - else - [] - end - [item, value] - } - end - - def predecessor_with_item(state, item) - state.predecessors.each do |state| - state.kernels.each do |kernel| - return [state, kernel] if kernel.rule == item.rule && kernel.position == item.position - 1 - end - end - end - - def goto_follows(state, shift, next_state) - compute_include_dependencies(state, shift, next_state).reduce([]) {|result, goto| - st, sh, next_st = goto - result.union(st.always_follows(sh, next_st)) - } - end - - def compute_include_dependencies(state, shift, next_state) - internal = state.internal_dependencies(shift, next_state).map {|sh, next_st| [state, sh, next_st] } - - item = state.kernels.find {|kernel| kernel.next_sym == shift.next_sym } - return internal unless item.symbols_after_dot.all?(&:nullable) - - s, i = state, item - while i.position > 0 - s = predecessors(s).find {|p| p.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } } - i = s.kernels.find {|item| item.rule == i.rule && item.position == i.position - 1 } - end - - p_shift, p_next_state = s.transitions.find {|sh, _| sh.next_sym == item.lhs } - internal.union([[s, p_shift, p_next_state]]) - end - def annotation_list(state) - manifestations = state.inadequacy_list.transform_values {|hash| hash.to_h {|token, actions| [token, annotate_manifestation(state, token, actions)] } } - state.transitions.reduce(manifestations) {|item, transition| - item.merge(annotate_predecessor(state, transition[1], annotation_list(transition[1])[transition[1]])) {|state, annotations, other_annotations| - annotations.merge(other_annotations) {|token, actions, other_actions| - actions.merge(other_actions) {|action, items, other_items| - items.merge(other_items) {|item, bool, other_bool| - raise if bool != other_bool - bool - } - } - } - } - } - end - - def annotate_manifestation(state, token, actions) - actions.to_h {|action, item| - [action, - if action.is_a?(State::Shift) - {} - elsif action.is_a?(State::Reduce) - if item.empty_rule? - compute_lhs_contributions(state, item.lhs, token) - else - state.kernels.to_h {|kernel| [kernel, kernel.rule == item.rule && kernel.end_of_rule?] } - end - end - ] - } - end - - def compute_lhs_contributions(state, sym, token) - shift, next_state = state.nterm_transitions.find {|sh, _| sh.next_sym == sym } - if state.always_follows(shift, next_state).include?(token) - {} - else - state.kernels.to_h {|kernel| [kernel, state.follow_kernel?(kernel) && item_lookahead_set(state)[kernel].include?(token)] } - end - end - - def annotate_predecessor(state, next_state, annotation_list) - {state => annotation_list.to_h {|token, actions| - next [token, {}] if actions.empty? || actions.any? {|action, hash| - hash.keys.any? {|item| hash[item] && item.position == 1 && compute_lhs_contributions(state, item.lhs, token).empty? } - } - [token, actions.to_h {|action, hash| - [action, hash.to_h {|item, _| - kernel = state.kernels.find {|k| k.rule == item.rule && k.position == item.position - 1 } - [kernel, - hash[item] && - ( - !kernel.nil? && (item_lookahead_set(state)[kernel].include?(token)) || - (item.position == 1 && compute_lhs_contributions(state, item.lhs, token)[item]) - ) - ] - }] - }] - } - } + manifestations = state.annotate_manifestation + predecessors = state.transitions.map {|_, next_state| state.annotate_predecessor(annotation_list(next_state)) } + p state, state.inadequacy_list, manifestations, predecessors + manifestations + predecessors end end end diff --git a/lib/lrama/states/item.rb b/lib/lrama/states/item.rb index 31b74b9d..8a36bc1e 100644 --- a/lib/lrama/states/item.rb +++ b/lib/lrama/states/item.rb @@ -62,6 +62,10 @@ def symbols_after_dot rhs[position..-1] end + def symbols_after_transition + rhs[position+1..-1] + end + def to_s "#{lhs.id.s_value}: #{display_name}" end diff --git a/sample/calc.y b/sample/calc.y index 5c291105..10da33b2 100644 --- a/sample/calc.y +++ b/sample/calc.y @@ -25,6 +25,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %union { int val; } +%define lr.type ielr %token LF %token NUM %type expr diff --git a/spec/fixtures/integration/calculator.y b/spec/fixtures/integration/calculator.y index 21e38ea8..6dffb307 100644 --- a/spec/fixtures/integration/calculator.y +++ b/spec/fixtures/integration/calculator.y @@ -16,6 +16,7 @@ static int yyerror(YYLTYPE *loc, const char *str); %type expr %left '+' '-' %left '*' '/' +%define lr.type ielr %% diff --git a/spec/fixtures/integration/ielr.y b/spec/fixtures/integration/ielr.y new file mode 100644 index 00000000..d8680a30 --- /dev/null +++ b/spec/fixtures/integration/ielr.y @@ -0,0 +1,62 @@ +%{ +#include +#include +#include "y.tab.h" +#define YYDEBUG 1 +static int yylex(YYSTYPE *val, YYLTYPE *loc); +static int yyerror(YYLTYPE *loc, const char *str); +%} + +%union { + int val; +} + +%token a +%token b +%token c +%define lr.type ielr + +%% +S: a A B a + | b A B b +A: a C D E +B: c + | // empty +C: D +D: a +E: a + | // empty + +%% + +static int yylex(YYSTYPE *yylval, YYLTYPE *loc) { + int c = getchar(); + printf("%c\n", c); + int val; + + switch (c) { + case ' ': case '\t': + return yylex(yylval, loc); + + case 'a': case 'b': case 'c': + return c; + + case '\n': + exit(0); + + default: + fprintf(stderr, "unknown character: %c\n", c); + exit(1); + } +} + +static int yyerror(YYLTYPE *loc, const char *str) { + fprintf(stderr, "parse error: %s\n", str); + return 0; +} + +int main() { + printf("Enter the formula:\n"); + yyparse(); + return 0; +} diff --git a/spec/lrama/state_spec.rb b/spec/lrama/state_spec.rb new file mode 100644 index 00000000..df20b1df --- /dev/null +++ b/spec/lrama/state_spec.rb @@ -0,0 +1,29 @@ +RSpec.describe Lrama::State do + let(:grammar) { <<-FILE } + %union { + int val; + } + + %token a + %token b + %token c + %define lr.type ielr + + %% + S: a A B a + | b A B b + A: a C D E + B: c + | // empty + C: D + D: a + E: a + | // empty + %% + FILE + + + describe '#internal_dependencies' do + + end +end diff --git a/spec/lrama/states_spec.rb b/spec/lrama/states_spec.rb index e611c22c..22825321 100644 --- a/spec/lrama/states_spec.rb +++ b/spec/lrama/states_spec.rb @@ -1910,4 +1910,17 @@ class : keyword_class tSTRING keyword_end %prec tPLUS end end end + + describe '#compute_ielr' do + it 'recompute states' do + path = "integration/ielr.y" + y = File.read(fixture_path(path)) + grammar = Lrama::Parser.new(y, path).parse + grammar.prepare + grammar.validate! + states = Lrama::States.new(grammar, warning) + states.compute + states.compute_ielr + end + end end