class Kleene::DFA
def accept?
def accept? @current_state.final? end
def add_transition(token, from_state, to_state)
def add_transition(token, from_state, to_state) @alphabet << token # alphabet is a set, so there will be no duplications @states << to_state # states is a set, so there will be no duplications (to_state should be the only new state) new_transition = DFATransition.new(token, from_state, to_state) @transitions[from_state][token] = new_transition new_transition end
def all_transitions() # : Array(DFATransition)
def all_transitions() # : Array(DFATransition) transitions.flat_map {|state, char_transition_map| char_transition_map.values } end
def clear_error_states
def clear_error_states @error_states = nil end
def deep_clone
def deep_clone old_states = @states.to_a new_states = old_states.map(&:dup) state_mapping = old_states.zip(new_states).to_h transition_mapping = Hash.new new_transitions = transitions.map do |state, char_transition_map| [ state_mapping[state], char_transition_map.map do |char, old_transition| new_transition = DFATransition.new(old_transition.token, state_mapping[old_transition.from], state_mapping[old_transition.to]) transition_mapping[old_transition] = new_transition [char, new_transition] end.to_h ] end.to_h # new_transition_callbacks = transition_callbacks.map do |transition, callback| # { # transition_mapping[transition], # callback # } # end.to_h new_dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets.map {|dfa_state, nfa_state_set| [state_mapping[dfa_state], nfa_state_set] }.to_h DFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_dfa_state_to_nfa_state_sets, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern) end
def error?
def error? @current_state.error? end
def error_states
def error_states @error_states ||= @states.select {|s| s.error? }.to_set end
def handle_token!(input_token, token_index)
def handle_token!(input_token, token_index) @current_state = next_state(@current_state, input_token, token_index) end
def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil)
def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil) @start_state = start_state @current_state = start_state @transitions = transitions @dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets @alphabet = alphabet + all_transitions.map(&:token) @states = reachable_states(@start_state) @final_states = Set.new @nfa_state_to_dfa_state_sets = Hash.new @dfa_state_to_nfa_state_sets.each do |dfa_state, nfa_state_set| nfa_state_set.each do |nfa_state| dfa_state_set = @nfa_state_to_dfa_state_sets[nfa_state] ||= Set.new dfa_state_set << dfa_state end end @transition_callbacks = transition_callbacks || Hash.new @transition_callbacks_per_destination_state = Hash.new @origin_nfa = origin_nfa update_final_states reset_current_state end
def match?(input)
def match?(input) reset_current_state input.each_char.with_index do |char, index| handle_token!(char, index) end if accept? MatchRef.new(input, 0...input.size) end end
def matches(input)
def matches(input) (0...input.size).reduce([]) do |memo, offset| memo + matches_at_offset(input, offset) end end
def matches_at_offset(input, input_start_offset)
def matches_at_offset(input, input_start_offset) reset_current_state matches = [] (input_start_offset...input.size).each do |offset| token = input[offset] handle_token!(token, offset) if accept? matches << MatchRef.new(input, input_start_offset..offset) end end matches end
def minimize!
This is an implementation of the "Reducing a DFA to a Minimal DFA" algorithm presented here: http://web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart4.pdf
def minimize! # todo: I'll implement this when I need it end
def next_state(from_state, input_token, token_index)
def next_state(from_state, input_token, token_index) transition = @transitions[from_state][input_token] || raise("No DFA transition found. Input token #{input_token} not in DFA alphabet.") # invoke the relevant transition callback function transition_callbacks[transition].try {|callback_fn| callback_fn.call(transition, input_token, token_index) } transition_callbacks_per_destination_state[transition.to].try {|callback_fn| callback_fn.call(transition, input_token, token_index) } transition.to end
def on_transition(transition, &blk)
def on_transition(transition, &blk) @transition_callbacks[transition] = blk end
def on_transition_to(state, &blk)
def on_transition_to(state, &blk) @transition_callbacks_per_destination_state[state] = blk end
def origin_nfa
def origin_nfa @origin_nfa || raise("This DFA was not created from an NFA, therefore it has no origin_nfa.") end
def reachable_states(start_state)
def reachable_states(start_state) visited_states = Set.new() unvisited_states = Set[start_state] while !unvisited_states.empty? outbound_transitions = unvisited_states.flat_map {|state| @transitions[state].try(&:values) || Array.new } destination_states = outbound_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end
def regex_pattern
def regex_pattern @regex_pattern || "<<empty>>" end
def reset_current_state
def reset_current_state @current_state = @start_state end
def set_regex_pattern(pattern)
def set_regex_pattern(pattern) @regex_pattern = pattern self end
def shallow_clone
def shallow_clone DFA.new(start_state, alphabet, transitions, dfa_state_to_nfa_state_sets, transition_callbacks, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern) end
def to_s(verbose = false)
def to_s(verbose = false) if verbose retval = states.map(&:to_s).join("\n") retval += "\n" all_transitions.each do |t| retval += "#{t.from.id} -> #{t.token} -> #{t.to.id}\n" end retval else regex_pattern end end
def update_final_states
def update_final_states @final_states = @states.select {|s| s.final? }.to_set end