class HexaPDF::Content::Tokenizer

:nodoc:
See: PDF2.0 s7.2
* Indirect object references are not supported by this tokenizer!
constructor.
checks in each iteration. If this behaviour is wanted, pass “raise_on_eos: true” in the
instead of returning NO_MORE_TOKENS once the end of the string is reached to avoid costly
* Since a content stream is usually parsed front to back, a StopIteration error can be raised
Changes:
string and not on an IO.
More efficient tokenizer for content streams. This tokenizer class works directly on a

def initialize(string, raise_on_eos: false)

Creates a new tokenizer.

def initialize(string, raise_on_eos: false)
  @ss = StringScanner.new(string)
  @string = string
  @raise_on_eos = raise_on_eos
end

def next_token

See: HexaPDF::Tokenizer#next_token

def next_token
  @ss.skip(WHITESPACE_MULTI_RE)
  case (byte = @ss.scan_byte || -1)
  when 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 # + - . 0..9
    @ss.pos -= 1
    parse_number
  when 47 # /
    parse_name
  when 40 # (
    parse_literal_string
  when 60 # <
    if @ss.peek_byte == 60
      @ss.pos += 1
      TOKEN_DICT_START
    else
      parse_hex_string
    end
  when 62 # >
    unless @ss.scan_byte == 62
      raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos - 1)
    end
    TOKEN_DICT_END
  when 91 # [
    TOKEN_ARRAY_START
  when 93 # ]
    TOKEN_ARRAY_END
  when 41 # )
    raise HexaPDF::MalformedPDFError.new("Delimiter ')' found at invalid position", pos: pos - 1)
  when 123, 125 # { } )
    Token.new(byte.chr.b)
  when 37 # %
    unless @ss.skip_until(/(?=[\r\n])/)
      (@raise_on_eos ? (raise StopIteration) : (return NO_MORE_TOKENS))
    end
    next_token
  when -1
    @raise_on_eos ? raise(StopIteration) : NO_MORE_TOKENS
  else
    @ss.pos -= 1
    parse_keyword
  end
end

def parse_number

See: HexaPDF::Tokenizer#parse_number

def parse_number
  if (val = @ss.scan(/[+-]?(?:\d+\.\d*|\.\d+)/))
    val << '0' if val.getbyte(-1) == 46 # dot '.'
    Float(val)
  elsif (val = @ss.scan_integer)
    val.to_i
  else
    parse_keyword
  end
end

def pos

See: HexaPDF::Tokenizer#pos

def pos
  @ss.pos
end

def pos=(pos)

See: HexaPDF::Tokenizer#pos=

def pos=(pos)
  @ss.pos = pos
end

def prepare_string_scanner(*)

Stub implementation to prevent errors for not-overridden methods.

def prepare_string_scanner(*)
end

def scan_until(re)

See: HexaPDF::Tokenizer#scan_until

def scan_until(re)
  @ss.scan_until(re)
end

Namespace

HexaPDF::Content

Parent class

HexaPDF::Content::HexaPDF::Tokenizer

Instance Methods

Defined in

lib/hexapdf/content/parser.rb

Modules

Classes