class Regexp::Lexer

given syntax flavor.
normalizes tokens for the parser, and checks if they are implemented by the
collects emitted tokens into an array, calculates their nesting depth, and
A very thin wrapper around the scanner that breaks quantified literal runs,

def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)

def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
  new.lex(input, syntax, options: options, &block)
end

def ascend(type, token)

def ascend(type, token)
  case type
  when :group, :assertion
    self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
  when :set
    self.set_nesting = set_nesting - 1 if token == :close
  when :conditional
    self.conditional_nesting = conditional_nesting - 1 if token == :close
  end
end

def break_codepoint_list(token)

def break_codepoint_list(token)
  lead, _, tail = token.text.rpartition(' ')
  return if lead.empty?
  tokens.pop
  tokens << Regexp::Token.new(:escape, :codepoint_list, lead + '}',
            token.ts, (token.te - tail.length),
            nesting, set_nesting, conditional_nesting)
  tokens << Regexp::Token.new(:escape, :codepoint_list, '\u{' + tail,
            (token.ts + lead.length + 1), (token.te + 3),
            nesting, set_nesting, conditional_nesting)
  self.shift = shift + 3 # one space less, but extra \, u, {, and }
end

def break_literal(token)

into two separate tokens when it is followed by a quantifier
called by scan to break a literal run that is longer than one character

def break_literal(token)
  lead, last, _ = token.text.partition(/.\z/mu)
  return if lead.empty?
  tokens.pop
  tokens << Regexp::Token.new(:literal, :literal, lead,
            token.ts, (token.te - last.length),
            nesting, set_nesting, conditional_nesting)
  tokens << Regexp::Token.new(:literal, :literal, last,
            (token.ts + lead.length), token.te,
            nesting, set_nesting, conditional_nesting)
end

def descend(type, token)

def descend(type, token)
  case type
  when :group, :assertion
    self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
  when :set
    self.set_nesting = set_nesting + 1 if token == :open
  when :conditional
    self.conditional_nesting = conditional_nesting + 1 if token == :open
  end
end

def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)

def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
  syntax = Regexp::Syntax.new(syntax)
  self.tokens = []
  self.nesting = 0
  self.set_nesting = 0
  self.conditional_nesting = 0
  self.shift = 0
  last = nil
  Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
    type, token = *syntax.normalize(type, token)
    syntax.check! type, token
    ascend(type, token)
    if type == :quantifier and last
      break_literal(last)        if last.type == :literal
      break_codepoint_list(last) if last.token == :codepoint_list
    end
    current = Regexp::Token.new(type, token, text, ts + shift, te + shift,
                                nesting, set_nesting, conditional_nesting)
    current = merge_condition(current) if type == :conditional and
      [:condition, :condition_close].include?(token)
    last.next = current if last
    current.previous = last if last
    tokens << current
    last = current
    descend(type, token)
  end
  if block_given?
    tokens.map { |t| block.call(t) }
  else
    tokens
  end
end

def merge_condition(current)

def merge_condition(current)
  last = tokens.pop
  Regexp::Token.new(:conditional, :condition, last.text + current.text,
    last.ts, current.te, nesting, set_nesting, conditional_nesting)
end

Namespace

Regexp

Class Methods

:: lex

Instance Methods

Defined in

lib/regexp_parser/lexer.rb

Modules

Classes