class CodeRay::Scanners::Scanner

if you want.
You can also use map, any?, find and even sort_by,
OK, this is a very simple example :)
# prints: (*==)++;
end
puts text if kind == :operator
for text, kind in c_scanner
c_scanner = CodeRay::Scanners.new “if (*p == ‘{’) nest++;”
require ‘coderay’
Tokens:
It is also Enumerable, so you can use it like an Array of
makes it easy to access the scanning methods inside.
It is a subclass of Ruby’s great StringScanner, which
The base class for all Scanners.
= Scanner

def binary_string

will scan next.
To be used with #pos, which is the index of the byte the scanner

The string in binary encoding.
def binary_string
  @binary_string ||=
    if string.respond_to?(:bytesize) && string.bytesize != string.size
      #:nocov:
      string.dup.force_encoding('binary')
      #:nocov:
    else
      string
    end
end

def column pos = self.pos

See also: #line.
The current column position of the scanner, starting with 1.
def column pos = self.pos
  return 1 if pos <= 0
  pos - (binary_string.rindex(?\n, pos - 1) || -1)
end

def each &block

Traverse the tokens.
def each &block
  tokens.each(&block)
end

def encode_with_encoding code, target_encoding

def encode_with_encoding code, target_encoding
  if code.encoding == target_encoding
    if code.valid_encoding?
      return to_unix(code)
    else
      source_encoding = guess_encoding code
    end
  else
    source_encoding = code.encoding
  end
  # print "encode_with_encoding from #{source_encoding} to #{target_encoding}"
  code.encode target_encoding, source_encoding, :universal_newline => true, :undef => :replace, :invalid => :replace
end

def encoding name = 'UTF-8'

The encoding used internally by this scanner.
def encoding name = 'UTF-8'
  @encoding ||= defined?(Encoding.find) && Encoding.find(name)
end

def file_extension extension = lang

The typical filename suffix for this scanner's language.
def file_extension extension = lang
  @file_extension ||= extension.to_s
end

def file_extension

the default file extension for this scanner
def file_extension
  self.class.file_extension
end

def guess_encoding s

def guess_encoding s
  #:nocov:
  IO.popen("file -b --mime -", "w+") do |file|
    file.write s[0, 1024]
    file.close_write
    begin
      Encoding.find file.gets[/charset=([-\w]+)/, 1]
    rescue ArgumentError
      Encoding::BINARY
    end
  end
  #:nocov:
end

def initialize code = '', options = {}

Else, a Tokens object is used.

overwrite default options here.)
It is merged with the default options of the class (you can
* +options+ is a Hash with Symbols as keys.
StringScanner.
* +code+ is the input String and is handled by the superclass

Create a new Scanner.
def initialize code = '', options = {}
  if self.class == Scanner
    raise NotImplementedError, "I am only the basic Scanner class. I can't scan anything. :( Use my subclasses."
  end
  
  @options = self.class::DEFAULT_OPTIONS.merge options
  
  super self.class.normalize(code)
  
  @tokens = options[:tokens] || Tokens.new
  @tokens.scanner = self if @tokens.respond_to? :scanner=
  
  setup
end

def lang

The lang of this Scanner class, which is equal to its Plugin ID.
def lang
  @plugin_id
end

def lang

the Plugin ID for this scanner
def lang
  self.class.lang
end

def line pos = self.pos

for debugging only.
Beware, this is implemented inefficiently. It should be used

See also: #column.
The current line position of the scanner, starting with 1.
def line pos = self.pos
  return 1 if pos <= 0
  binary_string[0...pos].count("\n") + 1
end

def normalize code

replaced by placeholders. Always returns a new object.
scanner's internal encoding, with invalid and undefined charachters
Normalizes the given code into a string with UNIX newlines, in the
def normalize code
  # original = code
  code = code.to_s unless code.is_a? ::String
  return code if code.empty?
  
  if code.respond_to? :encoding
    code = encode_with_encoding code, self.encoding
  else
    code = to_unix code
  end
  # code = code.dup if code.eql? original
  code
end

def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller

Scanner error with additional status information
def raise_inspect msg, tokens, state = self.state || 'No state given!', ambit = 30, backtrace = caller
  raise ScanError, <<-EOE % [
OR in %s: %s (after %d tokens)
:
t line: %d  column: %d  pos: %d
d: %p  state: %p
 %p,  eos? = %p
nding code:
  %p
OR***
  EOE
    File.basename(caller[0]),
    msg,
    tokens.respond_to?(:size) ? tokens.size : 0,
    tokens.respond_to?(:last) ? tokens.last(10).map { |t| t.inspect }.join("\n") : '',
    line, column, pos,
    matched, state, bol?, eos?,
    binary_string[pos - ambit, ambit],
    binary_string[pos, ambit],
  ], backtrace
end

def reset

method instead of this one.
Sets back the scanner. Subclasses should redefine the reset_instance
def reset
  super
  reset_instance
end

def reset_instance

Resets the scanner.
def reset_instance
  @tokens.clear if @tokens.respond_to?(:clear) && !@options[:keep_tokens]
  @cached_tokens = nil
  @binary_string = nil if defined? @binary_string
end

def scan_rest

This method also avoids a JRuby 1.9 mode bug.
Shorthand for scan_until(/\z/).
def scan_rest
  rest = self.rest
  terminate
  rest
end

def scan_tokens tokens, options # :doc:

:doc:
and must only use Tokens#<< for storing scanned tokens!
Subclasses must implement this method; it must return +tokens+

subclass implements.
This is the central method, and commonly the only one a
def scan_tokens tokens, options  # :doc:
  raise NotImplementedError, "#{self.class}#scan_tokens not implemented."
end

def setup # :doc:

:doc:
scan.
Use reset for initialization that has to be done once per

that has to be done once per instance.
Can be implemented by subclasses to do some initialization
def setup  # :doc:
end

def string= code

Set a new string to be scanned.
def string= code
  code = self.class.normalize(code)
  super code
  reset_instance
end

def to_unix code

def to_unix code
  code.index(?\r) ? code.gsub(/\r\n?/, "\n") : code
end

def tokenize source = nil, options = {}

Scan the code and returns all tokens in a Tokens object.
def tokenize source = nil, options = {}
  options = @options.merge(options)
  @tokens = options[:tokens] || @tokens || Tokens.new
  @tokens.scanner = self if @tokens.respond_to? :scanner=
  case source
  when Array
    self.string = self.class.normalize(source.join)
  when nil
    reset
  else
    self.string = self.class.normalize(source)
  end
  
  begin
    scan_tokens @tokens, options
  rescue => e
    message = "Error in %s#scan_tokens, initial state was: %p" % [self.class, defined?(state) && state]
    raise_inspect e.message, @tokens, message, 30, e.backtrace
  end
  
  @cached_tokens = @tokens
  if source.is_a? Array
    @tokens.split_into_parts(*source.map { |part| part.size })
  else
    @tokens
  end
end

def tokens

Cache the result of tokenize.
def tokens
  @cached_tokens ||= tokenize
end