module Kleene::DSL
def any(token_collection, alphabet = DEFAULT_ALPHABET)
two states: start state and final state
def any(token_collection, alphabet = DEFAULT_ALPHABET) start = State.new nfa = NFA.new(start, alphabet) final = State.new(true) token_collection.each {|token| nfa.add_transition(token, start, final) } nfa.update_final_states regex_pattern = "[#{token_collection.join("")}]" nfa.set_regex_pattern(regex_pattern) nfa end
def append(a, b)
Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b.
Append b onto a
def append(a, b) a = a.deep_clone b = b.deep_clone append!(a, b) end
def append!(a, b)
Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b.
Destructively append b onto a
def append!(a, b) a.alphabet = a.alphabet | b.alphabet # add an epsilon transition from each final state of machine a to the start state of maachine b. a.final_states.each do |final_state| a.add_transition(NFATransition::Epsilon, final_state, b.start_state) end # add all of machine b's transitions to machine a b.all_transitions.each {|transition| a.add_transition(transition.token, transition.from, transition.to) } # a.final_states = a.final_states | b.final_states a.update_final_states a end
def dot(alphabet = DEFAULT_ALPHABET)
two states: start state and final state
def dot(alphabet = DEFAULT_ALPHABET) any(alphabet, alphabet).set_regex_pattern(".") end
def kleene(machine)
Epsilon transitions are drawn between the new start state and the old start state,
It creates a new start state and an additional final state.
The machine resulting from the Kleene Star operator will match zero or more repetitions of the machine it is applied to.
Implements Kleene Star, as defined in the Ragel manual in section 2.5.6 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:
def kleene(machine) machine = machine.deep_clone start = State.new final = State.new(true) nfa = NFA.new(start, machine.alphabet) nfa.add_states(machine.states) nfa.add_transition(NFATransition::Epsilon, start, final) nfa.add_transition(NFATransition::Epsilon, start, machine.start_state) machine.final_states.each do |final_state| nfa.add_transition(NFATransition::Epsilon, final_state, start) final_state.final = false end # add all of machine's transitions to the new machine (machine.all_transitions).each {|t| nfa.add_transition(t.token, t.from, t.to) } nfa.update_final_states nfa.set_regex_pattern("#{machine.regex_pattern}*") end
def literal(token_stream, alphabet = DEFAULT_ALPHABET)
...
transition for second character in the string -> state for having observed second character in the string ->
structure: start state -> transition for first character in the string -> state for having observed first character in the string ->
N+1 states: start state and N other states
given a string with N characters in it:
def literal(token_stream, alphabet = DEFAULT_ALPHABET) start = current_state = State.new nfa = NFA.new(start, alphabet) token_stream.each_char do |token| next_state = State.new nfa.add_transition(token, current_state, next_state) current_state = next_state end current_state.final = true nfa.update_final_states nfa.set_regex_pattern(token_stream) nfa end
def optional(machine)
def optional(machine) empty = NFA.new(State.new(true), machine.alphabet).set_regex_pattern("") union(machine, empty).set_regex_pattern("#{machine.regex_pattern}?") end
def plus(machine)
def plus(machine) seq(machine, kleene(machine)).set_regex_pattern("#{machine.regex_pattern}+") end
def range(c_begin, c_end, alphabet = DEFAULT_ALPHABET)
def range(c_begin, c_end, alphabet = DEFAULT_ALPHABET) any((c_begin..c_end).to_a, alphabet).set_regex_pattern("[#{c_begin}-#{c_end}]") end
def seq(*nfas)
def seq(*nfas) nfas.flatten.reduce {|memo_nfa, nfa| seq2(memo_nfa, nfa) } end
def seq2(a, b)
Seq draws epsilon transitions from the final states of thefirst machine to the start state of the second machine.
Seq produces a machine that matches all the strings in machine `a` followed by all the strings in machine `b`.
Implements concatenation, as defined in the Ragel manual in section 2.5.5 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:
def seq2(a, b) a = a.deep_clone b = b.deep_clone a = append!(a, b) # make sure that b's final states are the only final states in a after we have appended b onto a a.states.each {|state| state.final = b.final_states.includes?(state) } a.update_final_states a.set_regex_pattern("#{a.regex_pattern}#{b.regex_pattern}") end
def union(*nfas)
Epsilon transitions are drawn from the new start state to the start states of both input machines.
The operation first creates a new start state.
The union operation produces a machine that matches any string in machine one or machine two.
Implements Union, as defined in the Ragel manual in section 2.5.1 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:
The resulting machine's final states are the set of final states from all the NFAs in `nfas`.
Build a new machine consisting of a new start state with epsilon transitions to the start state of all the given NFAs in `nfas`.
def union(*nfas) nfas.flatten! nfas = nfas.map(&:deep_clone) union!(nfas) end
def union!(nfas)
def union!(nfas) start = State.new composite_alphabet = nfas.map(&:alphabet).reduce {|memo, alphabet| memo | alphabet } new_nfa = NFA.new(start, composite_alphabet) # add epsilon transitions from the start state of the new machine to the start state of machines a and b nfas.each do |nfa| new_nfa.add_states(nfa.states) new_nfa.add_transition(NFATransition::Epsilon, start, nfa.start_state) nfa.all_transitions.each {|t| new_nfa.add_transition(t.token, t.from, t.to) } end new_nfa.update_final_states new_nfa.set_regex_pattern("#{nfas.map(&:regex_pattern).join("|")}") end
def with_err(nfa, alphabet = nfa.alphabet)
def with_err(nfa, alphabet = nfa.alphabet) with_err!(nfa.deep_clone, alphabet) end
def with_err!(nfa, alphabet = nfa.alphabet)
adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token.
def with_err!(nfa, alphabet = nfa.alphabet) error_state = nfa.states.find(&:error?) return nfa if error_state error_state = State.new_error_state nfa.add_state(error_state) nfa.states.each do |state| tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token) missing_tokens = alphabet - tokens_on_outbound_transitions missing_tokens.each do |token| nfa.add_transition(token, state, error_state) end end nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state } nfa.set_regex_pattern("/#{nfa.regex_pattern}/E") end
def with_err_dead_end(nfa, alphabet = nfa.alphabet)
def with_err_dead_end(nfa, alphabet = nfa.alphabet) with_err_dead_end!(nfa.deep_clone, alphabet) end
def with_err_dead_end!(nfa, alphabet = nfa.alphabet)
adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token.
def with_err_dead_end!(nfa, alphabet = nfa.alphabet) error_state = nfa.states.find(&:error?) return nfa if error_state error_state = State.new_error_state nfa.add_state(error_state) nfa.states.each do |state| unless state.error? tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token).to_set only_outbound_transition_is_epsilon_transition = tokens_on_outbound_transitions.size == 1 && tokens_on_outbound_transitions.first == NFATransition::Epsilon unless only_outbound_transition_is_epsilon_transition missing_tokens = (alphabet - Set[NFATransition::Epsilon]) - tokens_on_outbound_transitions missing_tokens.each do |token| nfa.add_transition(token, state, error_state) end end end end # remove the error state if it has no inbound or outbound transitions nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state } nfa.set_regex_pattern("/#{nfa.regex_pattern}/DE") end