class Kleene::BatchMultiMatchDFA
def match_tracker(input) # : BatchMatchTracker
def match_tracker(input) # : BatchMatchTracker dfa = @composite_dfa.deep_clone match_tracker = setup_callbacks(dfa) input.each_char.with_index do |char, index| dfa.handle_token!(char, index) end match_tracker end
def matches(input) # : Hash(NFA, Array(MatchRef))
#matches(input) is the batch-style matching interface
def matches(input) # : Hash(NFA, Array(MatchRef)) mt = match_tracker(input) start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions mt.empty_matches.each do |nfa_with_dead_err, indices| original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa indices.each do |index| mt.add_match(original_nfa, MatchRef.new(input, index...index)) end end active_dfas = Array.new # the Int32 represents the start of match input.each_char.with_index do |char, index| active_dfas.reject! do |active_dfa_tuple| dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple dfa_clone.handle_token!(char, index) mt.add_match(original_nfa, MatchRef.new(input, start_of_match_index..index)) if dfa_clone.accept? dfa_clone.error? end if nfas_with_dead_err = start_index_to_nfas_that_may_match[index] nfas_with_dead_err.each do |nfa_with_dead_err| machines = machines_from_nfa_with_dead_err(nfa_with_dead_err) original_nfa = machines.nfa dfa = machines.dfa dfa_clone = dfa.shallow_clone dfa_clone.handle_token!(char, index) mt.add_match(original_nfa, MatchRef.new(input, index..index)) if dfa_clone.accept? active_dfas << [dfa_clone, original_nfa, index] unless dfa_clone.error? end end end mt.matches end
def setup_callbacks(dfa)
def setup_callbacks(dfa) match_tracker = BatchMatchTracker.new # 1. identify DFA states that correspond to successful match of first character of the NFAs epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state) nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state). reject {|transition| transition.epsilon? || transition.to.error? }. map(&:to).to_set dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 2. identify DFA states that correspond to final states in the NFAs nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set } dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dead_end_nfas_that_have_transitioned_to_final_state = Hash.new dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state| dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 3. Identify DFA states that correspond to successful match without even having seen any characters. # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions. nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # set up call transition call backs, since the callbacks may only be defined once per state and transition # For (1): # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun # to be matched on the transition to one of the states in (1) # For (2): # set up transition callbacks to push the index position of the end of a successful match onto the list # of successful matches for the NFA that matched # For (3): # set up transision callbacks to capture successful empty matches destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states destination_dfa_states_for_callbacks.each do |dfa_state| dfa.on_transition_to(dfa_state) do |transition, token, token_index| destination_dfa_state = transition.to should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state) should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state) should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state) if should_track_empty_match dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_empty_match(nfa_with_dead_end, token_index) end end if should_track_start_of_candidate_match nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end| match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index) end end if should_track_end_of_match dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_end_of_match(nfa_with_dead_end, token_index) end end end end match_tracker end