lib/rouge/lexers/yaml.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class YAML < RegexLexer
      title "YAML"
      desc "Yaml Ain't Markup Language (yaml.org)"
      mimetypes 'text/x-yaml'
      tag 'yaml'
      aliases 'yml'
      filenames '*.yaml', '*.yml'

      # Documentation: https://yaml.org/spec/1.2/spec.html

      def self.detect?(text)
        # look for the %YAML directive
        return true if text =~ /\A\s*%YAML/m
      end

      # NB: Tabs are forbidden in YAML, which is why you see things
      # like /[ ]+/.

      # reset the indentation levels
      def reset_indent
        puts "    yaml: reset_indent" if @debug
        @indent_stack = [0]
        @next_indent = 0
        @block_scalar_indent = nil
      end

      def indent
        raise 'empty indent stack!' if @indent_stack.empty?
        @indent_stack.last
      end

      def dedent?(level)
        level < self.indent
      end

      def indent?(level)
        level > self.indent
      end

      # Save a possible indentation level
      def save_indent(match)
        @next_indent = match.size
        puts "    yaml: indent: #{self.indent}/#@next_indent" if @debug
        puts "    yaml: popping indent stack - before: #@indent_stack" if @debug
        if dedent?(@next_indent)
          @indent_stack.pop while dedent?(@next_indent)
          puts "    yaml: popping indent stack - after: #@indent_stack" if @debug
          puts "    yaml: indent: #{self.indent}/#@next_indent" if @debug

          # dedenting to a state not previously indented to is an error
          [match[0...self.indent], match[self.indent..-1]]
        else
          [match, '']
        end
      end

      def continue_indent(match)
        puts "    yaml: continue_indent" if @debug
        @next_indent += match.size
      end

      def set_indent(match, opts={})
        if indent < @next_indent
          puts "    yaml: indenting #{indent}/#{@next_indent}" if @debug
          @indent_stack << @next_indent
        end

        @next_indent += match.size unless opts[:implicit]
      end

      plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/

      start { reset_indent }

      state :basic do
        rule %r/#.*$/, Comment::Single
      end

      state :root do
        mixin :basic

        rule %r/\n+/, Text

        # trailing or pre-comment whitespace
        rule %r/[ ]+(?=#|$)/, Text

        rule %r/^%YAML\b/ do
          token Name::Tag
          reset_indent
          push :yaml_directive
        end

        rule %r/^%TAG\b/ do
          token Name::Tag
          reset_indent
          push :tag_directive
        end

        # doc-start and doc-end indicators
        rule %r/^(?:---|\.\.\.)(?= |$)/ do
          token Name::Namespace
          reset_indent
          push :block_line
        end

        # indentation spaces
        rule %r/[ ]*(?!\s|$)/ do |m|
          text, err = save_indent(m[0])
          token Text, text
          token Error, err
          push :block_line; push :indentation
        end
      end

      state :indentation do
        rule(/\s*?\n/) { token Text; pop! 2 }
        # whitespace preceding block collection indicators
        rule %r/[ ]+(?=[-:?](?:[ ]|$))/ do |m|
          token Text
          continue_indent(m[0])
        end

        # block collection indicators
        rule(/[?:-](?=[ ]|$)/) do |m|
          set_indent m[0]
          token Punctuation::Indicator
        end

        # the beginning of a block line
        rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! }
      end

      # indented line in the block context
      state :block_line do
        # line end
        rule %r/[ ]*(?=#|$)/, Text, :pop!
        rule %r/[ ]+/, Text
        # tags, anchors, and aliases
        mixin :descriptors
        # block collections and scalars
        mixin :block_nodes
        # flow collections and quoed scalars
        mixin :flow_nodes

        # a plain scalar
        rule %r/(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
          token Name::Variable
          push :plain_scalar_in_block_context
        end
      end

      state :descriptors do
        # a full-form tag
        rule %r/!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type

        # a tag in the form '!', '!suffix' or '!handle!suffix'
        rule %r(
          (?:![\w-]+)? # handle
          !(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
        )x, Keyword::Type

        # an anchor
        rule %r/&[\p{L}\p{Nl}\p{Nd}_-]+/, Name::Label

        # an alias
        rule %r/\*[\p{L}\p{Nl}\p{Nd}_-]+/, Name::Variable
      end

      state :block_nodes do
        # implicit key
        rule %r/([^#,?\[\]{}"'\n]+)(:)(?=\s|$)/ do |m|
          groups Name::Attribute, Punctuation::Indicator
          set_indent m[0], :implicit => true
        end

        # literal and folded scalars
        rule %r/[\|>][+-]?/ do
          token Punctuation::Indicator
          push :block_scalar_content
          push :block_scalar_header
        end
      end

      state :flow_nodes do
        rule %r/\[/, Punctuation::Indicator, :flow_sequence
        rule %r/\{/, Punctuation::Indicator, :flow_mapping
        rule %r/'/, Str::Single, :single_quoted_scalar
        rule %r/"/, Str::Double, :double_quoted_scalar
      end

      state :flow_collection do
        rule %r/\s+/m, Text
        mixin :basic
        rule %r/[?:,]/, Punctuation::Indicator
        mixin :descriptors
        mixin :flow_nodes

        rule %r/(?=#{plain_scalar_start})/ do
          push :plain_scalar_in_flow_context
        end
      end

      state :flow_sequence do
        rule %r/\]/, Punctuation::Indicator, :pop!
        mixin :flow_collection
      end

      state :flow_mapping do
        rule %r/\}/, Punctuation::Indicator, :pop!
        mixin :flow_collection
      end

      state :block_scalar_content do
        rule %r/\n+/, Text

        # empty lines never dedent, but they might be part of the scalar.
        rule %r/^[ ]+$/ do |m|
          text = m[0]
          indent_size = text.size

          indent_mark = @block_scalar_indent || indent_size

          token Text, text[0...indent_mark]
          token Name::Constant, text[indent_mark..-1]
        end

        # TODO: ^ doesn't actually seem to affect the match at all.
        # Find a way to work around this limitation.
        rule %r/^[ ]*/ do |m|
          token Text

          indent_size = m[0].size

          dedent_level = @block_scalar_indent || self.indent
          @block_scalar_indent ||= indent_size

          if indent_size < dedent_level
            save_indent m[0]
            pop!
            push :indentation
          end
        end

        rule %r/[^\n\r\f\v]+/, Str
      end

      state :block_scalar_header do
        # optional indentation indicator and chomping flag, in either order
        rule %r(
          (
            ([1-9])[+-]? | [+-]?([1-9])?
          )(?=[ ]|$)
        )x do |m|
          @block_scalar_indent = nil
          goto :ignored_line
          next if m[0].empty?

          increment = m[1] || m[2]
          if increment
            @block_scalar_indent = indent + increment.to_i
          end

          token Punctuation::Indicator
        end
      end

      state :ignored_line do
        mixin :basic
        rule %r/[ ]+/, Text
        rule %r/\n/, Text, :pop!
      end

      state :quoted_scalar_whitespaces do
        # leading and trailing whitespace is ignored
        rule %r/^[ ]+/, Text
        rule %r/[ ]+$/, Text

        rule %r/\n+/m, Text

        rule %r/[ ]+/, Name::Variable
      end

      state :single_quoted_scalar do
        mixin :quoted_scalar_whitespaces
        rule %r/\\'/, Str::Escape
        rule %r/'/, Str, :pop!
        rule %r/[^\s']+/, Str
      end

      state :double_quoted_scalar do
        rule %r/"/, Str, :pop!
        mixin :quoted_scalar_whitespaces
        # escapes
        rule %r/\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape
        rule %r/\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
          Str::Escape
        rule %r/[^ \t\n\r\f\v"\\]+/, Str
      end

      state :plain_scalar_in_block_context_new_line do
        rule %r/^[ ]+\n/, Text
        rule %r/\n+/m, Text
        rule %r/^(?=---|\.\.\.)/ do
          pop! 3
        end

        # dedent detection
        rule %r/^[ ]*/ do |m|
          token Text
          pop!

          indent_size = m[0].size

          # dedent = end of scalar
          if indent_size <= self.indent
            pop!
            save_indent(m[0])
            push :indentation
          end
        end
      end

      state :plain_scalar_in_block_context do
        # the : indicator ends a scalar
        rule %r/[ ]*(?=:[ \n]|:$)/, Text, :pop!
        rule %r/[ ]*:\S+/, Str
        rule %r/[ ]+(?=#)/, Text, :pop!
        rule %r/[ ]+$/, Text
        # check for new documents or dedents at the new line
        rule %r/\n+/ do
          token Text
          push :plain_scalar_in_block_context_new_line
        end

        rule %r/[ ]+/, Str
        rule %r((true|false|null)\b), Keyword::Constant
        rule %r/\d+(?:\.\d+)?(?=(\r?\n)| +#)/, Literal::Number, :pop!

        # regular non-whitespace characters
        rule %r/[^\s:]+/, Str
      end

      state :plain_scalar_in_flow_context do
        rule %r/[ ]*(?=[,:?\[\]{}])/, Text, :pop!
        rule %r/[ ]+(?=#)/, Text, :pop!
        rule %r/^[ ]+/, Text
        rule %r/[ ]+$/, Text
        rule %r/\n+/, Text
        rule %r/[ ]+/, Name::Variable
        rule %r/[^\s,:?\[\]{}]+/, Name::Variable
      end

      state :yaml_directive do
        rule %r/([ ]+)(\d+\.\d+)/ do
          groups Text, Num
          goto :ignored_line
        end
      end

      state :tag_directive do
        rule %r(
          ([ ]+)(!|![\w-]*!) # prefix
          ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
        )x do
          groups Text, Keyword::Type, Text, Keyword::Type
          goto :ignored_line
        end
      end
    end
  end
end