class Kleene::NFA
def accept?
def accept? @current_states.any?(&:final?) end
def add_state(new_state)
def add_state(new_state) @states << new_state end
def add_states(states)
def add_states(states) @states.merge(states) end
def add_transition(token, from_state, to_state)
def add_transition(token, from_state, to_state) # # make sure states EITHER have a single outbound epsilon transition OR non-epsilon outbound transitions; they can't have both # if token == NFATransition::Epsilon # # make sure from_state doesn't have any outbound non-epsilon transitions # raise "Error: Non-epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any? {|t| !t.epsilon? } # else # # make sure from_state doesn't have any outbound epsilon transition # raise "Error: Epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any?(&:epsilon?) # end @alphabet << token # alphabet is a set, so there will be no duplications @states << from_state @states << to_state new_transition = NFATransition.new(token, from_state, to_state) char_transition_map = @transitions[from_state] ||= Hash.new set_of_transisions = char_transition_map[token] ||= Set.new set_of_transisions << new_transition new_transition end
def all_transitions() # : Array(NFATransition)
def all_transitions() # : Array(NFATransition) transitions.flat_map {|state, char_transition_map| char_transition_map.values.flat_map(&:to_a) } end
def deep_clone
def deep_clone old_states = @states.to_a new_states = old_states.map(&:dup) state_mapping = old_states.zip(new_states).to_h new_transitions = transitions.map {|state, char_transition_map| [ state_mapping[state], char_transition_map.map {|char, set_of_transisions| [ char, set_of_transisions.map {|transition| NFATransition.new(transition.token, state_mapping[transition.from], state_mapping[transition.to])}.to_set ] }.to_h ] }.to_h NFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_states.to_set).set_regex_pattern(regex_pattern) end
def epsilon_closure(state_set) # : Set(State)
Returns a Set of State objects.
That is, determine what states are reachable on an epsilon transition from the current state set (@current_states).
Determine the epsilon closure of the given state set
def epsilon_closure(state_set) # : Set(State) state_set = state_set.is_a?(State) ? Set[state_set] : state_set visited_states = Set.new() unvisited_states = state_set while !unvisited_states.empty? epsilon_transitions = unvisited_states.compact_map {|state| @transitions.dig(state, NFATransition::Epsilon) }.flat_map(&:to_a) destination_states = epsilon_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end
def error_states
def error_states @states.select(&:error?).to_set end
def graphviz
def graphviz retval = "digraph G { " all_transitions.each do |t| transition_label = t.epsilon? ? "ε" : t.token retval += "#{t.from.id} -> #{t.to.id} [label=\"#{transition_label}\"];" end @final_states.each do |s| retval += "#{s.id} [color=lightblue2, style=filled, shape=doublecircle];" end retval += " }" retval end
def handle_token!(input_token)
def handle_token!(input_token) @current_states = next_states(@current_states, input_token) end
def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil)
def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) @start_state = start_state @transitions = transitions @alphabet = alphabet + all_transitions.map(&:token) @states = initial_states || reachable_states(start_state) @current_states = Set.new @final_states = Set.new update_final_states reset_current_states end
def match?(input) # : MatchRef?
def match?(input) # : MatchRef? # puts "match?(\"#{input}\")" # puts self.to_s reset_current_states # puts @current_states.map(&:id) input.each_char.with_index do |char, index| # puts char handle_token!(char) # puts @current_states.map(&:id) end if accept? MatchRef.new(input, 0...input.size) end end
def matches(input)
def matches(input) (0...input.size).reduce([]) do |memo, offset| memo + matches_at_offset(input, offset) end end
def matches_at_offset(input, input_start_offset)
def matches_at_offset(input, input_start_offset) reset_current_states matches = [] (input_start_offset...input.size).each do |offset| token = input[offset] handle_token!(token) if accept? matches << MatchRef.new(input, input_start_offset..offset) end end matches end
def next_states(state_set, input_token)
def next_states(state_set, input_token) # Retrieve a list of states in the epsilon closure of the given state set epsilon_reachable_states = epsilon_closure(state_set) # puts "epsilon_reachable_states = #{epsilon_reachable_states.map(&:id)}" # Build an array of outbound transitions from each state in the epsilon-closure # Filter the outbound transitions, selecting only those that accept the input we are given. outbound_transitions = epsilon_reachable_states.compact_map {|state| @transitions.dig(state, input_token) }.flat_map(&:to_a) # puts "outbound_transitions = #{outbound_transitions.inspect}" # Build an array of epsilon-closures of each transition's destination state. destination_state_epsilon_closures = outbound_transitions.map {|transition| epsilon_closure(transition.to) } # Union each of the epsilon-closures (each is a set) together to form a flat array of states in the epsilon-closure of all of our current states. next_states = destination_state_epsilon_closures.reduce {|combined_state_set, individual_state_set| combined_state_set.merge(individual_state_set) } next_states || Set.new end
def reachable_states(start_state)
def reachable_states(start_state) visited_states = Set.new() unvisited_states = Set[start_state] while !unvisited_states.empty? outbound_transitions = unvisited_states.flat_map {|state| @transitions[state]&.values&.flat_map(&:to_a) || Array.new } destination_states = outbound_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end
def regex_pattern
def regex_pattern @regex_pattern || "<<empty>>" end
def remove_state(state)
def remove_state(state) raise "Unable to remove state from NFA: at least one transition leads to or from the state." if all_transitions.any? {|transition| transition.from == state || transition.to == state } @states.delete(state) end
def reset_current_states
def reset_current_states @current_states = epsilon_closure(@start_state) end
def set_regex_pattern(pattern)
def set_regex_pattern(pattern) @regex_pattern = pattern self end
def to_dfa
This implements the subset construction algorithm presented on page 118 of the first edition of the dragon book.
def to_dfa state_map = Hash.new # this map contains (nfa_state_set => dfa_state) pairs dfa_transitions = Hash.new dfa_alphabet = @alphabet - Set[NFATransition::Epsilon] visited_state_sets = Set.new() nfa_start_state_set = epsilon_closure(@start_state) unvisited_state_sets = Set[nfa_start_state_set] dfa_start_state = State.new(nfa_start_state_set.any?(&:final?), nfa_start_state_set.any?(&:error?)) state_map[nfa_start_state_set] = dfa_start_state until unvisited_state_sets.empty? # take one of the unvisited state sets state_set = unvisited_state_sets.first current_dfa_state = state_map[state_set] # Figure out the set of next-states for each token in the alphabet # Add each set of next-states to unvisited_state_sets dfa_alphabet.each do |token| next_nfa_state_set = next_states(state_set, token) unvisited_state_sets << next_nfa_state_set # this new DFA state, next_dfa_state, represents the next nfa state set, next_nfa_state_set next_dfa_state = state_map[next_nfa_state_set] ||= State.new(next_nfa_state_set.any?(&:final?), next_nfa_state_set.any?(&:error?)) char_transition_map = dfa_transitions[current_dfa_state] ||= Hash.new char_transition_map[token] = DFATransition.new(token, current_dfa_state, next_dfa_state) end visited_state_sets << state_set unvisited_state_sets = unvisited_state_sets - visited_state_sets end # `state_map.invert` is sufficient to convert from a (nfa_state_set => dfa_state) mapping to a (dfa_state => nfa_state_set) mapping, because the mappings are strictly one-to-one. DFA.new(state_map[nfa_start_state_set], dfa_alphabet, dfa_transitions, state_map.invert, origin_nfa: self).set_regex_pattern(regex_pattern) end
def to_s(verbose = false)
def to_s(verbose = false) if verbose retval = states.map(&:to_s).join("\n") retval += "\n" all_transitions.each do |t| transition_label = t.epsilon? ? "epsilon" : t.token retval += "#{t.from.id} -> #{transition_label} -> #{t.to.id}\n" end retval else regex_pattern end end
def transitions_from(state_set) # : Set(NFATransition)
end
@transitions[state]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new
def transitions_from(state) # : Set(NFATransition)
def transitions_from(state_set) # : Set(NFATransition) case state_set when State @transitions[state_set]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new when Set state_set.map {|state| transitions_from(state) }.reduce {|memo, state_set| memo | state_set } else raise "boom" end end
def update_final_states
def update_final_states @final_states = @states.select { |s| s.final? }.to_set end