class GraphQL::Language::Lexer
def self.replace_escaped_characters_in_place(raw_string)
Replace any escaped unicode or whitespace with the _actual_ characters
def self.replace_escaped_characters_in_place(raw_string) raw_string.gsub!(ESCAPED) do |matched_str| if (point_str_1 = $1 || $2) codepoint_1 = point_str_1.to_i(16) if (codepoint_2 = $3) codepoint_2 = codepoint_2.to_i(16) if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) && # leading surrogate (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) # trailing surrogate # A surrogate pair combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000 [combined].pack('U'.freeze) else # Two separate code points [codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze) end else [codepoint_1].pack('U'.freeze) end else ESCAPES_REPLACE[matched_str] end end nil end
def self.tokenize(string)
This is not used during parsing because the parser
def self.tokenize(string) lexer = GraphQL::Language::Lexer.new(string) tokens = [] while (token_name = lexer.advance) new_token = [ token_name, lexer.line_number, lexer.column_number, lexer.debug_token_value(token_name), ] tokens << new_token end tokens end
def _hash key
This produces a unique integer for bytes 2 and 3 of each keyword string
def _hash key (key * 18592990) >> 27 & 0x1f end
def advance
def advance @scanner.skip(IGNORE_REGEXP) if @scanner.eos? @finished = true return false end @tokens_count += 1 if @tokens_count > @max_tokens raise_parse_error("This query is too large to execute.") end @pos = @scanner.pos next_byte = @string.getbyte(@pos) next_byte_is_for = FIRST_BYTES[next_byte] case next_byte_is_for when ByteFor::PUNCTUATION @scanner.pos += 1 PUNCTUATION_NAME_FOR_BYTE[next_byte] when ByteFor::NAME if len = @scanner.skip(KEYWORD_REGEXP) case len when 2 :ON when 12 :SUBSCRIPTION else pos = @pos # Use bytes 2 and 3 as a unique identifier for this keyword bytes = (@string.getbyte(pos + 2) << 8) | @string.getbyte(pos + 1) KEYWORD_BY_TWO_BYTES[_hash(bytes)] end else @scanner.skip(IDENTIFIER_REGEXP) :IDENTIFIER end when ByteFor::IDENTIFIER @scanner.skip(IDENTIFIER_REGEXP) :IDENTIFIER when ByteFor::NUMBER if len = @scanner.skip(NUMERIC_REGEXP) if GraphQL.reject_numbers_followed_by_names new_pos = @scanner.pos peek_byte = @string.getbyte(new_pos) next_first_byte = FIRST_BYTES[peek_byte] if next_first_byte == ByteFor::NAME || next_first_byte == ByteFor::IDENTIFIER number_part = token_value name_part = @scanner.scan(IDENTIFIER_REGEXP) raise_parse_error("Name after number is not allowed (in `#{number_part}#{name_part}`)") end end # Check for a matched decimal: @scanner[1] ? :FLOAT : :INT else # Attempt to find the part after the `-` value = @scanner.scan(/-\s?[a-z0-9]*/i) invalid_byte_for_number_error_message = "Expected type 'number', but it was malformed#{value.nil? ? "" : ": #{value.inspect}"}." raise_parse_error(invalid_byte_for_number_error_message) end when ByteFor::ELLIPSIS if @string.getbyte(@pos + 1) != 46 || @string.getbyte(@pos + 2) != 46 raise_parse_error("Expected `...`, actual: #{@string[@pos..@pos + 2].inspect}") end @scanner.pos += 3 :ELLIPSIS when ByteFor::STRING if @scanner.skip(BLOCK_STRING_REGEXP) || @scanner.skip(QUOTED_STRING_REGEXP) :STRING else raise_parse_error("Expected string or block string, but it was malformed") end else @scanner.pos += 1 :UNKNOWN_CHAR end rescue ArgumentError => err if err.message == "invalid byte sequence in UTF-8" raise_parse_error("Parse error on bad Unicode escape sequence", nil, nil) end end
def column_number
def column_number @scanner.string[0..@pos].split("\n").last.length end
def debug_token_value(token_name)
def debug_token_value(token_name) if token_name && Lexer::Punctuation.const_defined?(token_name) Lexer::Punctuation.const_get(token_name) elsif token_name == :ELLIPSIS "..." elsif token_name == :STRING string_value elsif @scanner.matched_size.nil? @scanner.peek(1) else token_value end end
def finished?
def finished? @finished end
def freeze
def freeze @scanner = nil super end
def initialize(graphql_str, filename: nil, max_tokens: nil)
def initialize(graphql_str, filename: nil, max_tokens: nil) if !(graphql_str.encoding == Encoding::UTF_8 || graphql_str.ascii_only?) graphql_str = graphql_str.dup.force_encoding(Encoding::UTF_8) end @string = graphql_str @filename = filename @scanner = StringScanner.new(graphql_str) @pos = nil @max_tokens = max_tokens || Float::INFINITY @tokens_count = 0 @finished = false end
def line_number
def line_number @scanner.string[0..@pos].count("\n") + 1 end
def raise_parse_error(message, line = line_number, col = column_number)
def raise_parse_error(message, line = line_number, col = column_number) raise GraphQL::ParseError.new(message, line, col, @string, filename: @filename) end
def string_value
def string_value str = token_value is_block = str.start_with?('"""') if is_block str.gsub!(/\A"""|"""\z/, '') return Language::BlockString.trim_whitespace(str) else str.gsub!(/\A"|"\z/, '') if !str.valid_encoding? || !str.match?(VALID_STRING) raise_parse_error("Bad unicode escape in #{str.inspect}") else Lexer.replace_escaped_characters_in_place(str) if !str.valid_encoding? raise_parse_error("Bad unicode escape in #{str.inspect}") else str end end end end
def token_value
def token_value @string.byteslice(@scanner.pos - @scanner.matched_size, @scanner.matched_size) rescue StandardError => err raise GraphQL::Error, "(token_value failed: #{err.class}: #{err.message})" end