class HexaPDF::Content::Tokenizer
:nodoc:
See: PDF2.0 s7.2
* Indirect object references are not supported by this tokenizer!
constructor.
checks in each iteration. If this behaviour is wanted, pass “raise_on_eos: true” in the
instead of returning NO_MORE_TOKENS
once the end of the string is reached to avoid costly
* Since a content stream is usually parsed front to back, a StopIteration error can be raised
Changes:
string and not on an IO.
More efficient tokenizer for content streams. This tokenizer class works directly on a
def initialize(string, raise_on_eos: false)
def initialize(string, raise_on_eos: false) @ss = StringScanner.new(string) @string = string @raise_on_eos = raise_on_eos end
def next_token
def next_token @ss.skip(WHITESPACE_MULTI_RE) case (byte = @ss.scan_byte || -1) when 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 # + - . 0..9 @ss.pos -= 1 parse_number when 47 # / parse_name when 40 # ( parse_literal_string when 60 # < if @ss.peek_byte == 60 @ss.pos += 1 TOKEN_DICT_START else parse_hex_string end when 62 # > unless @ss.scan_byte == 62 raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos - 1) end TOKEN_DICT_END when 91 # [ TOKEN_ARRAY_START when 93 # ] TOKEN_ARRAY_END when 41 # ) raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos - 1) when 123, 125 # { } ) Token.new(byte.chr.b) when 37 # % unless @ss.skip_until(/(?=[\r\n])/) (@raise_on_eos ? (raise StopIteration) : (return NO_MORE_TOKENS)) end next_token when -1 @raise_on_eos ? raise(StopIteration) : NO_MORE_TOKENS else @ss.pos -= 1 parse_keyword end end
def parse_number
def parse_number if (val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/)) val << '0' if val.getbyte(-1) == 46 # dot '.' Float(val) elsif (val = @ss.scan_integer) val.to_i else parse_keyword end end
def pos
def pos @ss.pos end
def pos=(pos)
def pos=(pos) @ss.pos = pos end
def prepare_string_scanner(*)
def prepare_string_scanner(*) end
def scan_until(re)
def scan_until(re) @ss.scan_until(re) end