lib/rouge/lexers/yaml.rb



# -*- coding: utf-8 -*- #

module Rouge
  module Lexers
    class YAML < RegexLexer
      title "YAML"
      desc "Yaml Ain't Markup Language (yaml.org)"
      mimetypes 'text/x-yaml'
      tag 'yaml'
      aliases 'yml'

      def self.analyze_text(text)
        # look for the %YAML directive
        return 1 if text =~ /\A\s*%YAML/m
      end

      filenames '*.yaml', '*.yml'
      # NB: Tabs are forbidden in YAML, which is why you see things
      # like /[ ]+/.

      # reset the indentation levels
      def reset_indent
        puts "    yaml: reset_indent" if @debug
        @indent_stack = [0]
        @next_indent = 0
        @block_scalar_indent = nil
      end

      def indent
        raise 'empty indent stack!' if @indent_stack.empty?
        @indent_stack.last
      end

      def dedent?(level)
        level < self.indent
      end

      def indent?(level)
        level > self.indent
      end

      # Save a possible indentation level
      def save_indent(match)
        @next_indent = match.size
        puts "    yaml: indent: #{self.indent}/#@next_indent" if @debug
        puts "    yaml: popping indent stack - before: #@indent_stack" if @debug
        if dedent?(@next_indent)
          @indent_stack.pop while dedent?(@next_indent)
          puts "    yaml: popping indent stack - after: #@indent_stack" if @debug
          puts "    yaml: indent: #{self.indent}/#@next_indent" if @debug

          # dedenting to a state not previously indented to is an error
          [match[0...self.indent], match[self.indent..-1]]
        else
          [match, '']
        end
      end

      def continue_indent(match)
        puts "    yaml: continue_indent" if @debug
        @next_indent += match.size
      end

      def set_indent(match, opts={})
        if indent < @next_indent
          @indent_stack << @next_indent
        end

        @next_indent += match.size unless opts[:implicit]
      end

      plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/

      start { reset_indent }

      state :basic do
        rule /#.*$/, Comment::Single
      end

      state :root do
        mixin :basic

        rule /\n+/, Text

        # trailing or pre-comment whitespace
        rule /[ ]+(?=#|$)/, Text

        rule /^%YAML\b/ do
          token Name::Tag
          reset_indent
          push :yaml_directive
        end

        rule /^%TAG\b/ do
          token Name::Tag
          reset_indent
          push :tag_directive
        end

        # doc-start and doc-end indicators
        rule /^(?:---|\.\.\.)(?= |$)/ do
          token Name::Namespace
          reset_indent
          push :block_line
        end

        # indentation spaces
        rule /[ ]*(?!\s|$)/ do |m|
          text, err = save_indent(m[0])
          token Text, text
          token Error, err
          push :block_line; push :indentation
        end
      end

      state :indentation do
        rule(/\s*?\n/) { token Text; pop! 2 }
        # whitespace preceding block collection indicators
        rule /[ ]+(?=[-:?](?:[ ]|$))/ do |m|
          token Text
          continue_indent(m[0])
        end

        # block collection indicators
        rule(/[?:-](?=[ ]|$)/) { |m| token Punctuation::Indicator; set_indent m[0] }

        # the beginning of a block line
        rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! }
      end

      # indented line in the block context
      state :block_line do
        # line end
        rule /[ ]*(?=#|$)/, Text, :pop!
        rule /[ ]+/, Text
        # tags, anchors, and aliases
        mixin :descriptors
        # block collections and scalars
        mixin :block_nodes
        # flow collections and quoed scalars
        mixin :flow_nodes

        # a plain scalar
        rule /(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
          token Name::Variable
          push :plain_scalar_in_block_context
        end
      end

      state :descriptors do
        # a full-form tag
        rule /!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type

        # a tag in the form '!', '!suffix' or '!handle!suffix'
        rule %r(
          (?:![\w-]+)? # handle
          !(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
        )x, Keyword::Type

        # an anchor
        rule /&[\w-]+/, Name::Label

        # an alias
        rule /\*[\w-]+/, Name::Variable
      end

      state :block_nodes do
        # implicit key
        rule /:(?=\s|$)/ do |m|
          token Punctuation::Indicator
          set_indent m[0], :implicit => true
        end

        # literal and folded scalars
        rule /[\|>]/ do
          token Punctuation::Indicator
          push :block_scalar_content
          push :block_scalar_header
        end
      end

      state :flow_nodes do
        rule /\[/, Punctuation::Indicator, :flow_sequence
        rule /\{/, Punctuation::Indicator, :flow_mapping
        rule /'/, Str::Single, :single_quoted_scalar
        rule /"/, Str::Double, :double_quoted_scalar
      end

      state :flow_collection do
        rule /\s+/m, Text
        mixin :basic
        rule /[?:,]/, Punctuation::Indicator
        mixin :descriptors
        mixin :flow_nodes

        rule /(?=#{plain_scalar_start})/ do
          push :plain_scalar_in_flow_context
        end
      end

      state :flow_sequence do
        rule /\]/, Punctuation::Indicator, :pop!
        mixin :flow_collection
      end

      state :flow_mapping do
        rule /\}/, Punctuation::Indicator, :pop!
        mixin :flow_collection
      end

      state :block_scalar_content do
        rule /\n+/, Text

        # empty lines never dedent, but they might be part of the scalar.
        rule /^[ ]+$/ do |m|
          text = m[0]
          indent_size = text.size

          indent_mark = @block_scalar_indent || indent_size

          token Text, text[0...indent_mark]
          token Name::Constant, text[indent_mark..-1]
        end

        # TODO: ^ doesn't actually seem to affect the match at all.
        # Find a way to work around this limitation.
        rule /^[ ]*/ do |m|
          token Text

          indent_size = m[0].size

          dedent_level = @block_scalar_indent || self.indent
          @block_scalar_indent ||= indent_size

          if indent_size < dedent_level
            pop! 2
          end
        end

        rule /[^\n\r\f\v]+/, Name::Constant
      end

      state :block_scalar_header do
        # optional indentation indicator and chomping flag, in either order
        rule %r(
          (
            ([1-9])[+-]? | [+-]?([1-9])?
          )(?=[ ]|$)
        )x do |m|
          @block_scalar_indent = nil
          goto :ignored_line
          next if m[0].empty?

          increment = m[1] || m[2]
          if increment
            @block_scalar_indent = indent + increment.to_i
          end

          token Punctuation::Indicator
        end
      end

      state :ignored_line do
        mixin :basic
        rule /[ ]+/, Text
        rule /\n/, Text, :pop!
      end

      state :quoted_scalar_whitespaces do
        # leading and trailing whitespace is ignored
        rule /^[ ]+/, Text
        rule /[ ]+$/, Text

        rule /\n+/m, Text

        rule /[ ]+/, Name::Variable
      end

      state :single_quoted_scalar do
        mixin :quoted_scalar_whitespaces
        rule /\\'/, Str::Escape
        rule /'/, Str, :pop!
        rule /[^\s']+/, Str
      end

      state :double_quoted_scalar do
        rule /"/, Str, :pop!
        mixin :quoted_scalar_whitespaces
        # escapes
        rule /\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape
        rule /\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
          Str::Escape
        rule /[^ \t\n\r\f\v"\\]+/, Str
      end

      state :plain_scalar_in_block_context_new_line do
        rule /^[ ]+\n/, Text
        rule /\n+/m, Text
        rule /^(?=---|\.\.\.)/ do
          pop! 3
        end

        # dedent detection
        rule /^[ ]*/ do |m|
          token Text
          pop!

          indent_size = m[0].size

          # dedent = end of scalar
          if indent_size <= self.indent
            pop!
            save_indent(m[0])
            push :indentation
          end
        end
      end

      state :plain_scalar_in_block_context do
        # the : indicator ends a scalar
        rule /[ ]*(?=:[ \n]|:$)/, Text, :pop!
        rule /[ ]*:/, Str
        rule /[ ]+(?=#)/, Text, :pop!
        rule /[ ]+$/, Text
        # check for new documents or dedents at the new line
        rule /\n+/ do
          token Text
          push :plain_scalar_in_block_context_new_line
        end

        rule /[ ]+/, Str
        # regular non-whitespace characters
        rule /[^\s:]+/, Str
      end

      state :plain_scalar_in_flow_context do
        rule /[ ]*(?=[,:?\[\]{}])/, Text, :pop!
        rule /[ ]+(?=#)/, Text, :pop!
        rule /^[ ]+/, Text
        rule /[ ]+$/, Text
        rule /\n+/, Text
        rule /[ ]+/, Name::Variable
        rule /[^\s,:?\[\]{}]+/, Name::Variable
      end

      state :yaml_directive do
        rule /([ ]+)(\d+\.\d+)/ do
          groups Text, Num
          goto :ignored_line
        end
      end

      state :tag_directive do
        rule %r(
          ([ ]+)(!|![\w-]*!) # prefix
          ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
        )x do
          groups Text, Keyword::Type, Text, Keyword::Type
          goto :ignored_line
        end
      end
    end
  end
end