lib/liquid/lexer.rb
# frozen_string_literal: true require "strscan" module Liquid class Lexer SPECIALS = { '|' => :pipe, '.' => :dot, ':' => :colon, ',' => :comma, '[' => :open_square, ']' => :close_square, '(' => :open_round, ')' => :close_round, '?' => :question, '-' => :dash, }.freeze IDENTIFIER = /[a-zA-Z_][\w-]*\??/ SINGLE_STRING_LITERAL = /'[^\']*'/ DOUBLE_STRING_LITERAL = /"[^\"]*"/ STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL) NUMBER_LITERAL = /-?\d+(\.\d+)?/ DOTDOT = /\.\./ COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ WHITESPACE_OR_NOTHING = /\s*/ def initialize(input) @ss = StringScanner.new(input) end def tokenize @output = [] until @ss.eos? @ss.skip(WHITESPACE_OR_NOTHING) break if @ss.eos? tok = if (t = @ss.scan(COMPARISON_OPERATOR)) [:comparison, t] elsif (t = @ss.scan(STRING_LITERAL)) [:string, t] elsif (t = @ss.scan(NUMBER_LITERAL)) [:number, t] elsif (t = @ss.scan(IDENTIFIER)) [:id, t] elsif (t = @ss.scan(DOTDOT)) [:dotdot, t] else c = @ss.getch if (s = SPECIALS[c]) [s, c] else raise SyntaxError, "Unexpected character #{c}" end end @output << tok end @output << [:end_of_string] end end end