module Kleene::DSL

def any(token_collection, alphabet = DEFAULT_ALPHABET)

structure: start state -> transitions for each token in the token collection -> final state
two states: start state and final state
def any(token_collection, alphabet = DEFAULT_ALPHABET)
  start = State.new
  nfa = NFA.new(start, alphabet)
  final = State.new(true)
  token_collection.each {|token| nfa.add_transition(token, start, final) }
  nfa.update_final_states
  regex_pattern = "[#{token_collection.join("")}]"
  nfa.set_regex_pattern(regex_pattern)
  nfa
end

def append(a, b)

This differs from `seq` in that the composite machine's final states are the union of machine a's final states and machine b's final states.
Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b.
Append b onto a
def append(a, b)
  a = a.deep_clone
  b = b.deep_clone
  append!(a, b)
end

def append!(a, b)

This differs from `seq` in that the composite machine's final states are the union of machine a's final states and machine b's final states.
Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b.
Destructively append b onto a
def append!(a, b)
  a.alphabet = a.alphabet | b.alphabet
  # add an epsilon transition from each final state of machine a to the start state of maachine b.
  a.final_states.each do |final_state|
    a.add_transition(NFATransition::Epsilon, final_state, b.start_state)
  end
  # add all of machine b's transitions to machine a
  b.all_transitions.each {|transition| a.add_transition(transition.token, transition.from, transition.to) }
  # a.final_states = a.final_states | b.final_states
  a.update_final_states
  a
end

def dot(alphabet = DEFAULT_ALPHABET)

structure: start state -> transitions for every token in the alphabet -> final state
two states: start state and final state
def dot(alphabet = DEFAULT_ALPHABET)
  any(alphabet, alphabet).set_regex_pattern(".")
end

def kleene(machine)

between the new start state and the new final state, and between the final states of the machine and the new start state.
Epsilon transitions are drawn between the new start state and the old start state,
It creates a new start state and an additional final state.
The machine resulting from the Kleene Star operator will match zero or more repetitions of the machine it is applied to.
Implements Kleene Star, as defined in the Ragel manual in section 2.5.6 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:
def kleene(machine)
  machine = machine.deep_clone
  start = State.new
  final = State.new(true)
  nfa = NFA.new(start, machine.alphabet)
  nfa.add_states(machine.states)
  nfa.add_transition(NFATransition::Epsilon, start, final)
  nfa.add_transition(NFATransition::Epsilon, start, machine.start_state)
  machine.final_states.each do |final_state|
    nfa.add_transition(NFATransition::Epsilon, final_state, start)
    final_state.final = false
  end
  # add all of machine's transitions to the new machine
  (machine.all_transitions).each {|t| nfa.add_transition(t.token, t.from, t.to) }
  nfa.update_final_states
  nfa.set_regex_pattern("#{machine.regex_pattern}*")
end

def literal(token_stream, alphabet = DEFAULT_ALPHABET)

transition for last character in the string -> state for having observed last character in the string (marked final)
...
transition for second character in the string -> state for having observed second character in the string ->
structure: start state -> transition for first character in the string -> state for having observed first character in the string ->
N+1 states: start state and N other states
given a string with N characters in it:
def literal(token_stream, alphabet = DEFAULT_ALPHABET)
  start = current_state = State.new
  nfa = NFA.new(start, alphabet)
  token_stream.each_char do |token|
    next_state = State.new
    nfa.add_transition(token, current_state, next_state)
    current_state = next_state
  end
  current_state.final = true
  nfa.update_final_states
  nfa.set_regex_pattern(token_stream)
  nfa
end

def optional(machine)

def optional(machine)
  empty = NFA.new(State.new(true), machine.alphabet).set_regex_pattern("")
  union(machine, empty).set_regex_pattern("#{machine.regex_pattern}?")
end

def plus(machine)

def plus(machine)
  seq(machine, kleene(machine)).set_regex_pattern("#{machine.regex_pattern}+")
end

def range(c_begin, c_end, alphabet = DEFAULT_ALPHABET)

This implements a character class, and is specifically for use with matching strings
def range(c_begin, c_end, alphabet = DEFAULT_ALPHABET)
  any((c_begin..c_end).to_a, alphabet).set_regex_pattern("[#{c_begin}-#{c_end}]")
end

def seq(*nfas)

def seq(*nfas)
  nfas.flatten.reduce {|memo_nfa, nfa| seq2(memo_nfa, nfa) }
end

def seq2(a, b)

The final states of the first machine lose their final state status, unless the start state of the second machine is final as well.
Seq draws epsilon transitions from the final states of thefirst machine to the start state of the second machine.
Seq produces a machine that matches all the strings in machine `a` followed by all the strings in machine `b`.
Implements concatenation, as defined in the Ragel manual in section 2.5.5 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:
def seq2(a, b)
  a = a.deep_clone
  b = b.deep_clone
  a = append!(a, b)
  # make sure that b's final states are the only final states in a after we have appended b onto a
  a.states.each {|state| state.final = b.final_states.includes?(state) }
  a.update_final_states
  a.set_regex_pattern("#{a.regex_pattern}#{b.regex_pattern}")
end

def union(*nfas)

The resulting machine has a final state setequivalent to the union of the final state sets of both input machines.
Epsilon transitions are drawn from the new start state to the start states of both input machines.
The operation first creates a new start state.
The union operation produces a machine that matches any string in machine one or machine two.
Implements Union, as defined in the Ragel manual in section 2.5.1 of http://www.colm.net/files/ragel/ragel-guide-6.10.pdf:

The resulting machine's final states are the set of final states from all the NFAs in `nfas`.
Build a new machine consisting of a new start state with epsilon transitions to the start state of all the given NFAs in `nfas`.
def union(*nfas)
  nfas.flatten!
  nfas = nfas.map(&:deep_clone)
  union!(nfas)
end

def union!(nfas)

same as union, but doesn't deep clone the constituent nfas
def union!(nfas)
  start = State.new
  composite_alphabet = nfas.map(&:alphabet).reduce {|memo, alphabet| memo | alphabet }
  new_nfa = NFA.new(start, composite_alphabet)
  # add epsilon transitions from the start state of the new machine to the start state of machines a and b
  nfas.each do |nfa|
    new_nfa.add_states(nfa.states)
    new_nfa.add_transition(NFATransition::Epsilon, start, nfa.start_state)
    nfa.all_transitions.each {|t| new_nfa.add_transition(t.token, t.from, t.to) }
  end
  new_nfa.update_final_states
  new_nfa.set_regex_pattern("#{nfas.map(&:regex_pattern).join("|")}")
end

def with_err(nfa, alphabet = nfa.alphabet)

always clones the given nfa and returns a new nfa with a non-final error state
def with_err(nfa, alphabet = nfa.alphabet)
  with_err!(nfa.deep_clone, alphabet)
end

def with_err!(nfa, alphabet = nfa.alphabet)

the error state transitions to itself on any token.
adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token.
def with_err!(nfa, alphabet = nfa.alphabet)
  error_state = nfa.states.find(&:error?)
  return nfa if error_state
  error_state = State.new_error_state
  nfa.add_state(error_state)
  nfa.states.each do |state|
    tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token)
    missing_tokens = alphabet - tokens_on_outbound_transitions
    missing_tokens.each do |token|
      nfa.add_transition(token, state, error_state)
    end
  end
  nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state }
  nfa.set_regex_pattern("/#{nfa.regex_pattern}/E")
end

def with_err_dead_end(nfa, alphabet = nfa.alphabet)

always clones the given nfa and returns a new nfa with a non-final error state
def with_err_dead_end(nfa, alphabet = nfa.alphabet)
  with_err_dead_end!(nfa.deep_clone, alphabet)
end

def with_err_dead_end!(nfa, alphabet = nfa.alphabet)

the error state doesn't have any outbound transitions.
adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token.
def with_err_dead_end!(nfa, alphabet = nfa.alphabet)
  error_state = nfa.states.find(&:error?)
  return nfa if error_state
  error_state = State.new_error_state
  nfa.add_state(error_state)
  nfa.states.each do |state|
    unless state.error?
      tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token).to_set
      only_outbound_transition_is_epsilon_transition = tokens_on_outbound_transitions.size == 1 && tokens_on_outbound_transitions.first == NFATransition::Epsilon
      unless only_outbound_transition_is_epsilon_transition
        missing_tokens = (alphabet - Set[NFATransition::Epsilon]) - tokens_on_outbound_transitions
        missing_tokens.each do |token|
          nfa.add_transition(token, state, error_state)
        end
      end
    end
  end
  # remove the error state if it has no inbound or outbound transitions
  nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state }
  nfa.set_regex_pattern("/#{nfa.regex_pattern}/DE")
end