lib/jamespath/tokenizer.rb
require 'strscan' module Jamespath class Token < Struct.new(:type, :value, :pos) def inspect; "#{type}(#{value.inspect}, pos=#{pos})" end alias to_s inspect end class Tokenizer attr_reader :tokens TOKENS = { lbracket: /\[/, rbracket: /\]/, lbrace: /\{/, rbrace: /\}/, comma: /,/, dot: /\./, colon: /:/, double_pipe: /\|\|/, asterisk: /\*/, number: /-?[0-9]+/, quoted_identifier: /"([^"\\]|\\"|\\\\|\\[^"])*"/, identifier: /[a-zA-Z0-9_\u007E-\uFFFF]+/ } def tokenize(source) @pos = 0 @source = source @scanner = StringScanner.new(source) @tokens = [] until @scanner.eos? @tokens << next_token end @tokens end protected def next_token @pos += @scanner.skip(/\s+/) || 0 TOKENS.each do |type, re| if token = @scanner.scan(re) and token.length > 0 pos, @pos = @pos, @pos + token.length if type == :quoted_identifier type = :identifier token = token[1...-1].gsub(/\\"/, '"') end return Token.new(type, token, pos) end end raise SyntaxError, "unexpected token at pos=#{@pos}: #{@source[@pos]}" end end end