class Kleene::OnlineDFA
def create_composite_nfa(nfas)
def create_composite_nfa(nfas) nfa = union!(nfas) # add epsilon transitions from all the states except the start state back to the start state nfa.states.each do |state| if state != nfa.start_state nfa.add_transition(NFATransition::Epsilon, state, nfa.start_state) end end nfa.update_final_states nfa end
def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef))
#ingest(input) is the online-style matching interface
def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef)) mt = @match_tracker start_index_of_input_fragment_in_buffer = @buffer.length input.each_char.with_index do |char, index| @active_composite_dfa.handle_token!(char, start_index_of_input_fragment_in_buffer + index) end @buffer << input start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions mt.empty_matches.each do |nfa_with_dead_err, indices| original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa indices.select {|index| index >= start_index_of_input_fragment_in_buffer }.each do |index| mt.add_match(original_nfa, MatchRef.new(@buffer, index...index)) end end input.each_char.with_index do |char, index| index_in_buffer = start_index_of_input_fragment_in_buffer + index @active_candidate_dfas.reject! do |active_dfa_tuple| dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple dfa_clone.handle_token!(char, index_in_buffer) mt.add_match(original_nfa, MatchRef.new(@buffer, start_of_match_index..index_in_buffer)) if dfa_clone.accept? dfa_clone.error? end if nfas_with_dead_err = start_index_to_nfas_that_may_match[index_in_buffer] nfas_with_dead_err.each do |nfa_with_dead_err| machines = machines_from_nfa_with_dead_err(nfa_with_dead_err) original_nfa = machines.nfa dfa = machines.dfa dfa_clone = dfa.shallow_clone dfa_clone.handle_token!(char, index_in_buffer) mt.add_match(original_nfa, MatchRef.new(@buffer, index_in_buffer..index_in_buffer)) if dfa_clone.accept? @active_candidate_dfas << [dfa_clone, original_nfa, index_in_buffer] unless dfa_clone.error? end end end matches end
def initialize(nfas)
def initialize(nfas) composite_alphabet = nfas.reduce(Set.new) {|memo, nfa| memo | nfa.alphabet } @original_nfas = nfas @nfas_with_err_state = nfas.map {|nfa| with_err_dead_end(nfa, composite_alphabet) } # copy NFAs and add dead-end error states to each of them dfas = @original_nfas.map(&:to_dfa) @nfa_to_index = @original_nfas.map.with_index {|nfa, index| [nfa, index] }.to_h @nfa_with_dead_err_to_index = @nfas_with_err_state.map.with_index {|nfa, index| [nfa, index] }.to_h @dfa_to_index = dfas.map.with_index {|dfa, index| [dfa, index] }.to_h @machines_by_index = @original_nfas.zip(nfas_with_err_state, dfas).map.with_index {|tuple, index| nfa, nfa_with_dead_err, dfa = tuple; [index, MachineTuple.new(nfa, nfa_with_dead_err, dfa)] }.to_h # build a mapping of (state -> nfa) pairs that capture which nfa owns each state @dead_end_nfa_state_to_dead_end_nfa = Hash.new @nfas_with_err_state.each do |nfa_with_dead_err| nfa_with_dead_err.states.each do |state| @dead_end_nfa_state_to_dead_end_nfa[state] = nfa_with_dead_err end end # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state @composite_nfa = create_composite_nfa(@nfas_with_err_state) @composite_dfa = @composite_nfa.to_dfa reset end
def machines_from_dfa(dfa) # : MachineTuple
def machines_from_dfa(dfa) # : MachineTuple machines_by_index[dfa_to_index[dfa]] end
def machines_from_nfa(nfa) # : MachineTuple
def machines_from_nfa(nfa) # : MachineTuple machines_by_index[nfa_to_index[nfa]] end
def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple
def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple machines_by_index[nfa_with_dead_err_to_index[nfa_with_dead_err]] end
def matches
def matches @match_tracker.matches end
def reset # : OnlineMatchTracker
def reset # : OnlineMatchTracker @active_composite_dfa = @composite_dfa.deep_clone @active_candidate_dfas = [] @match_tracker = setup_callbacks(@active_composite_dfa) @buffer = "" end
def setup_callbacks(dfa)
def setup_callbacks(dfa) match_tracker = OnlineMatchTracker.new # 1. identify DFA states that correspond to successful match of first character of the NFAs epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state) nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state). reject {|transition| transition.epsilon? || transition.to.error? }. map(&:to).to_set dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 2. identify DFA states that correspond to final states in the NFAs nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set } dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dead_end_nfas_that_have_transitioned_to_final_state = Hash.new dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state| dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 3. Identify DFA states that correspond to successful match without even having seen any characters. # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions. nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # set up call transition call backs, since the callbacks may only be defined once per state and transition # For (1): # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun # to be matched on the transition to one of the states in (1) # For (2): # set up transition callbacks to push the index position of the end of a successful match onto the list # of successful matches for the NFA that matched # For (3): # set up transision callbacks to capture successful empty matches destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states destination_dfa_states_for_callbacks.each do |dfa_state| dfa.on_transition_to(dfa_state) do |transition, token, token_index| destination_dfa_state = transition.to should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state) should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state) should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state) if should_track_empty_match dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_empty_match(nfa_with_dead_end, token_index) end end if should_track_start_of_candidate_match nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end| match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index) end end if should_track_end_of_match dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_end_of_match(nfa_with_dead_end, token_index) end end end end match_tracker end