class Crass::Tokenizer
Experimental RBS support (using type sampling data from the type_fusion
project).
# sig/crass/tokenizer.rbs class Crass::Tokenizer def self.tokenize: () -> (Array[Hash, node, Symbol, pos, Integer, raw, String, type, Symbol, value, String] | Array[Hash, node, Symbol, pos, Integer, raw, String, value, String]) def consume: () -> nil def consume_name: () -> String def create_token: (Symbol type, ?Hash properties) -> Hash def preprocess: (String input) -> String def tokenize: () -> (Array[Hash, node, Symbol, pos, Integer, raw, String, type, Symbol, value, String] | Array[Hash, node, Symbol, pos, Integer, raw, String, value, String]) end
-
dev.w3.org/csswg/css-syntax/#tokenization<br><br>Tokenizes a CSS string.
def self.tokenize(input, options = {})
Experimental RBS support (using type sampling data from the type_fusion
project).
def self.tokenize: () ->
This signature was generated using 2 samples from 1 application.
tokens.
Tokenizes the given _input_ as a CSS string and returns an array of
def self.tokenize(input, options = {}) Tokenizer.new(input, options).tokenize end
def consume
Experimental RBS support (using type sampling data from the type_fusion
project).
def consume: () -> nil
This signature was generated using 1 sample from 1 application.
Consumes a token and returns the token that was consumed.
def consume return nil if @s.eos? @s.mark # Consume comments. if comment_token = consume_comments if @options[:preserve_comments] return comment_token else return consume end end # Consume whitespace. return create_token(:whitespace) if @s.scan(RE_WHITESPACE) char = @s.consume case char.to_sym when :'"' consume_string when :'#' if @s.peek =~ RE_NAME || valid_escape?(@s.peek(2)) create_token(:hash, :type => start_identifier?(@s.peek(3)) ? :id : :unrestricted, :value => consume_name) else create_token(:delim, :value => char) end when :'$' if @s.peek == '=' @s.consume create_token(:suffix_match) else create_token(:delim, :value => char) end when :"'" consume_string when :'(' create_token(:'(') when :')' create_token(:')') when :* if @s.peek == '=' @s.consume create_token(:substring_match) # Non-standard: Preserve the IE * hack. elsif @options[:preserve_hacks] && @s.peek =~ RE_NAME_START @s.reconsume consume_ident else create_token(:delim, :value => char) end when :+ if start_number? @s.reconsume consume_numeric else create_token(:delim, :value => char) end when :',' create_token(:comma) when :- nextTwoChars = @s.peek(2) nextThreeChars = char + nextTwoChars if start_number?(nextThreeChars) @s.reconsume consume_numeric elsif nextTwoChars == '->' @s.consume @s.consume create_token(:cdc) elsif start_identifier?(nextThreeChars) @s.reconsume consume_ident else create_token(:delim, :value => char) end when :'.' if start_number? @s.reconsume consume_numeric else create_token(:delim, :value => char) end when :':' create_token(:colon) when :';' create_token(:semicolon) when :< if @s.peek(3) == '!--' @s.consume @s.consume @s.consume create_token(:cdo) else create_token(:delim, :value => char) end when :'@' if start_identifier?(@s.peek(3)) create_token(:at_keyword, :value => consume_name) else create_token(:delim, :value => char) end when :'[' create_token(:'[') when :'\\' if valid_escape? @s.reconsume consume_ident else # Parse error. create_token(:delim, :error => true, :value => char) end when :']' create_token(:']') when :'^' if @s.peek == '=' @s.consume create_token(:prefix_match) else create_token(:delim, :value => char) end when :'{' create_token(:'{') when :'}' create_token(:'}') when :U, :u if @s.peek(2) =~ RE_UNICODE_RANGE_START @s.consume consume_unicode_range else @s.reconsume consume_ident end when :| case @s.peek when '=' @s.consume create_token(:dash_match) when '|' @s.consume create_token(:column) else create_token(:delim, :value => char) end when :~ if @s.peek == '=' @s.consume create_token(:include_match) else create_token(:delim, :value => char) end else case char when RE_DIGIT @s.reconsume consume_numeric when RE_NAME_START @s.reconsume consume_ident else create_token(:delim, :value => char) end end end
def consume_bad_url
Consumes the remnants of a bad URL and returns the consumed text.
def consume_bad_url text = String.new until @s.eos? if valid_escape? text << consume_escaped elsif valid_escape?(@s.peek(2)) @s.consume text << consume_escaped else char = @s.consume if char == ')' break else text << char end end end text end
def consume_comments
Consumes comments and returns them, or `nil` if no comments were consumed.
def consume_comments if @s.peek(2) == '/*' @s.consume @s.consume if text = @s.scan_until(RE_COMMENT_CLOSE) text.slice!(-2, 2) else # Parse error. text = @s.consume_rest end return create_token(:comment, :value => text) end nil end
def consume_escaped
or EOF.
next character in the input has already been verified not to be a newline
This method assumes that the `\` has already been consumed, and that the
Consumes an escaped code point and returns its unescaped value.
def consume_escaped return "\ufffd" if @s.eos? if hex_str = @s.scan(RE_HEX) @s.consume if @s.peek =~ RE_WHITESPACE codepoint = hex_str.hex if codepoint == 0 || codepoint.between?(0xD800, 0xDFFF) || codepoint > 0x10FFFF return "\ufffd" else return codepoint.chr(Encoding::UTF_8) end end @s.consume end
def consume_ident
Consumes an ident-like token and returns it.
def consume_ident value = consume_name if @s.peek == '(' @s.consume if value.downcase == 'url' @s.consume while @s.peek(2) =~ RE_WHITESPACE_ANCHORED if @s.peek(2) =~ RE_QUOTED_URL_START create_token(:function, :value => value) else consume_url end else create_token(:function, :value => value) end else create_token(:ident, :value => value) end end
def consume_name
Experimental RBS support (using type sampling data from the type_fusion
project).
def consume_name: () -> String
This signature was generated using 1 sample from 1 application.
Consumes a name and returns it.
def consume_name result = String.new until @s.eos? if match = @s.scan(RE_NAME) result << match next end char = @s.consume if valid_escape? result << consume_escaped # Non-standard: IE * hack elsif char == '*' && @options[:preserve_hacks] result << @s.consume else @s.reconsume return result end end result end
def consume_number
`:integer` or `:number`).
original representation, its numeric value, and its type (either
Consumes a number and returns a 3-element array containing the number's
def consume_number repr = String.new type = :integer repr << @s.consume if @s.peek =~ RE_NUMBER_SIGN repr << (@s.scan(RE_DIGIT) || '') if match = @s.scan(RE_NUMBER_DECIMAL) repr << match type = :number end if match = @s.scan(RE_NUMBER_EXPONENT) repr << match type = :number end [repr, convert_string_to_number(repr), type] end
def consume_numeric
Consumes a numeric token and returns it.
def consume_numeric number = consume_number repr = number[0] value = number[1] type = number[2] if type == :integer value = value.to_i else value = value.to_f end if start_identifier?(@s.peek(3)) create_token(:dimension, :repr => repr, :type => type, :unit => consume_name, :value => value) elsif @s.peek == '%' @s.consume create_token(:percentage, :repr => repr, :type => type, :value => value) else create_token(:number, :repr => repr, :type => type, :value => value) end end
def consume_string(ending = nil)
token.
Consumes a string token that ends at the given character, and returns the
def consume_string(ending = nil) ending = @s.current if ending.nil? value = String.new until @s.eos? case char = @s.consume when ending break when "\n" # Parse error. @s.reconsume return create_token(:bad_string, :error => true, :value => value) when '\\' case @s.peek when '' # End of the input, so do nothing. next when "\n" @s.consume else value << consume_escaped end else value << char end end create_token(:string, :value => value) end
def consume_unicode_range
"U+" has already been consumed.
Consumes a Unicode range token and returns it. Assumes the initial "u+" or
def consume_unicode_range value = @s.scan(RE_HEX) || String.new while value.length < 6 break unless @s.peek == '?' value << @s.consume end range = {} if value.include?('?') range[:start] = value.gsub('?', '0').hex range[:end] = value.gsub('?', 'F').hex return create_token(:unicode_range, range) end range[:start] = value.hex if @s.peek(2) =~ RE_UNICODE_RANGE_END @s.consume range[:end] = (@s.scan(RE_HEX) || '').hex else range[:end] = range[:start] end create_token(:unicode_range, range) end
def consume_url
already been consumed.
Consumes a URL token and returns it. Assumes the original "url(" has
def consume_url value = String.new @s.scan(RE_WHITESPACE) until @s.eos? case char = @s.consume when ')' break when RE_WHITESPACE @s.scan(RE_WHITESPACE) if @s.eos? || @s.peek == ')' @s.consume break else return create_token(:bad_url, :value => value + consume_bad_url) end when '"', "'", '(', RE_NON_PRINTABLE # Parse error. return create_token(:bad_url, :error => true, :value => value + consume_bad_url) when '\\' if valid_escape? value << consume_escaped else # Parse error. return create_token(:bad_url, :error => true, :value => value + consume_bad_url ) end else value << char end end create_token(:url, :value => value) end
def convert_string_to_number(str)
Converts a valid CSS number string into a number and returns the number.
def convert_string_to_number(str) matches = RE_NUMBER_STR.match(str) s = matches[:sign] == '-' ? -1 : 1 i = matches[:integer].to_i f = matches[:fractional].to_i d = matches[:fractional] ? matches[:fractional].length : 0 t = matches[:exponent_sign] == '-' ? -1 : 1 e = matches[:exponent].to_i # I know this formula looks nutty, but it's exactly what's defined in the # spec, and it works. value = s * (i + f * 10**-d) * 10**(t * e) # Maximum and minimum values aren't defined in the spec, but are enforced # here for sanity. if value > Float::MAX value = Float::MAX elsif value < -Float::MAX value = -Float::MAX end value end
def create_token(type, properties = {})
Experimental RBS support (using type sampling data from the type_fusion
project).
def create_token: (Symbol type, ?value | String properties) -> node | Symbol | pos | Integer | raw | String | value | String
This signature was generated using 1 sample from 1 application.
def create_token(type, properties = {}) { :node => type, :pos => @s.marker, :raw => @s.marked }.merge!(properties) end
def initialize(input, options = {})
CSS 3 syntax rules.
such as the IE "*" hack will be preserved even though they violate
* **:preserve_hacks** - If `true`, certain non-standard browser hacks
`:comment` tokens.
* **:preserve_comments** - If `true`, comments will be preserved as
Options:
Initializes a new Tokenizer.
def initialize(input, options = {}) @s = Scanner.new(preprocess(input)) @options = options end
def preprocess(input)
Experimental RBS support (using type sampling data from the type_fusion
project).
def preprocess: (String input) -> String
This signature was generated using 1 sample from 1 application.
Preprocesses _input_ to prepare it for the tokenizer.
def preprocess(input) input = input.to_s.encode('UTF-8', :invalid => :replace, :undef => :replace) input.gsub!(/(?:\r\n|[\r\f])/, "\n") input.gsub!("\u0000", "\ufffd") input end
def start_identifier?(text = nil)
input stream will be checked, but will not be consumed.
identifier. If _text_ is `nil`, the current and next two characters in the
Returns `true` if the given three-character _text_ would start an
def start_identifier?(text = nil) text = @s.current + @s.peek(2) if text.nil? case text[0] when '-' nextChar = text[1] !!(nextChar == '-' || nextChar =~ RE_NAME_START || valid_escape?(text[1, 2])) when RE_NAME_START true when '\\' valid_escape?(text[0, 2]) else false end end
def start_number?(text = nil)
stream will be checked, but will not be consumed.
If _text_ is `nil`, the current and next two characters in the input
Returns `true` if the given three-character _text_ would start a number.
def start_number?(text = nil) text = @s.current + @s.peek(2) if text.nil? case text[0] when '+', '-' !!(text[1] =~ RE_DIGIT || (text[1] == '.' && text[2] =~ RE_DIGIT)) when '.' !!(text[1] =~ RE_DIGIT) when RE_DIGIT true else false end end
def tokenize
Experimental RBS support (using type sampling data from the type_fusion
project).
def tokenize: () ->
This signature was generated using 2 samples from 1 application.
def tokenize @s.reset tokens = [] while token = consume tokens << token end tokens end
def valid_escape?(text = nil)
in the input stream will be checked, but will not be consumed.
valid escape sequence. If _text_ is `nil`, the current and next character
Returns `true` if the given two-character _text_ is the beginning of a
def valid_escape?(text = nil) text = @s.current + @s.peek if text.nil? !!(text[0] == '\\' && text[1] != "\n") end