# -*- coding: utf-8 -*- #
module Rouge
module Lexers
class YAML < RegexLexer
desc "Yaml Ain't Markup Language (yaml.org)"
mimetypes 'text/x-yaml'
tag 'yaml'
aliases 'yml'
def self.analyze_text(text)
# look for the %YAML directive
return 1 if text =~ /\A\s*%YAML/m
end
filenames '*.yaml', '*.yml'
# NB: Tabs are forbidden in YAML, which is why you see things
# like /[ ]+/.
# reset the indentation levels
def reset_indent
puts " yaml: reset_indent" if @debug
@indent_stack = [0]
@next_indent = 0
@block_scalar_indent = nil
end
def indent
raise 'empty indent stack!' if @indent_stack.empty?
@indent_stack.last
end
def dedent?(level)
level < self.indent
end
def indent?(level)
level > self.indent
end
# Save a possible indentation level
def save_indent(match)
@next_indent = match.size
puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
puts " yaml: popping indent stack - before: #@indent_stack" if @debug
if dedent?(@next_indent)
@indent_stack.pop while dedent?(@next_indent)
puts " yaml: popping indent stack - after: #@indent_stack" if @debug
puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
# dedenting to a state not previously indented to is an error
[match[0...self.indent], match[self.indent..-1]]
else
[match, '']
end
end
def continue_indent(match)
puts " yaml: continue_indent" if @debug
@next_indent += match.size
end
def set_indent(match, opts={})
if indent < @next_indent
@indent_stack << @next_indent
end
@next_indent += match.size unless opts[:implicit]
end
plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/
start { reset_indent }
state :basic do
rule /#.*$/, Comment::Single
end
state :root do
mixin :basic
rule /\n+/, Text
# trailing or pre-comment whitespace
rule /[ ]+(?=#|$)/, Text
rule /^%YAML\b/ do
token Name::Tag
reset_indent
push :yaml_directive
end
rule /^%TAG\b/ do
token Name::Tag
reset_indent
push :tag_directive
end
# doc-start and doc-end indicators
rule /^(?:---|\.\.\.)(?= |$)/ do
token Name::Namespace
reset_indent
push :block_line
end
# indentation spaces
rule /[ ]*(?!\s|$)/ do |m|
text, err = save_indent(m[0])
token Text, text
token Error, err
push :block_line; push :indentation
end
end
state :indentation do
rule(/\s*?\n/) { token Text; pop! 2 }
# whitespace preceding block collection indicators
rule /[ ]+(?=[-:?](?:[ ]|$))/ do |m|
token Text
continue_indent(m[0])
end
# block collection indicators
rule(/[?:-](?=[ ]|$)/) { |m| token Punctuation::Indicator; set_indent m[0] }
# the beginning of a block line
rule(/[ ]*/) { |m| token Text; continue_indent(m[0]); pop! }
end
# indented line in the block context
state :block_line do
# line end
rule /[ ]*(?=#|$)/, Text, :pop!
rule /[ ]+/, Text
# tags, anchors, and aliases
mixin :descriptors
# block collections and scalars
mixin :block_nodes
# flow collections and quoed scalars
mixin :flow_nodes
# a plain scalar
rule /(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
token Name::Variable
push :plain_scalar_in_block_context
end
end
state :descriptors do
# a full-form tag
rule /!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, Keyword::Type
# a tag in the form '!', '!suffix' or '!handle!suffix'
rule %r(
(?:![\w-]+)? # handle
!(?:[\w;/?:@&=+$,.!~*\'()\[\]%-]*) # suffix
)x, Keyword::Type
# an anchor
rule /&[\w-]+/, Name::Label
# an alias
rule /\*[\w-]+/, Name::Variable
end
state :block_nodes do
# implicit key
rule /:(?=\s|$)/ do |m|
token Punctuation::Indicator
set_indent m[0], :implicit => true
end
# literal and folded scalars
rule /[\|>]/ do
token Punctuation::Indicator
push :block_scalar_content
push :block_scalar_header
end
end
state :flow_nodes do
rule /\[/, Punctuation::Indicator, :flow_sequence
rule /\{/, Punctuation::Indicator, :flow_mapping
rule /'/, Str::Single, :single_quoted_scalar
rule /"/, Str::Double, :double_quoted_scalar
end
state :flow_collection do
rule /\s+/m, Text
mixin :basic
rule /[?:,]/, Punctuation::Indicator
mixin :descriptors
mixin :flow_nodes
rule /(?=#{plain_scalar_start})/ do
push :plain_scalar_in_flow_context
end
end
state :flow_sequence do
rule /\]/, Punctuation::Indicator, :pop!
mixin :flow_collection
end
state :flow_mapping do
rule /\}/, Punctuation::Indicator, :pop!
mixin :flow_collection
end
state :block_scalar_content do
rule /\n+/, Text
# empty lines never dedent, but they might be part of the scalar.
rule /^[ ]+$/ do |m|
text = m[0]
indent_size = text.size
indent_mark = @block_scalar_indent || indent_size
token Text, text[0...indent_mark]
token Name::Constant, text[indent_mark..-1]
end
# TODO: ^ doesn't actually seem to affect the match at all.
# Find a way to work around this limitation.
rule /^[ ]*/ do |m|
token Text
indent_size = m[0].size
dedent_level = @block_scalar_indent || self.indent
@block_scalar_indent ||= indent_size
if indent_size < dedent_level
pop! 2
end
end
rule /[^\n\r\f\v]+/, Name::Constant
end
state :block_scalar_header do
# optional indentation indicator and chomping flag, in either order
rule %r(
(
([1-9])[+-]? | [+-]?([1-9])?
)(?=[ ]|$)
)x do |m|
@block_scalar_indent = nil
goto :ignored_line
next if m[0].empty?
increment = m[1] || m[2]
if increment
@block_scalar_indent = indent + increment.to_i
end
token Punctuation::Indicator
end
end
state :ignored_line do
mixin :basic
rule /[ ]+/, Text
rule /\n/, Text, :pop!
end
state :quoted_scalar_whitespaces do
# leading and trailing whitespace is ignored
rule /^[ ]+/, Text
rule /[ ]+$/, Text
rule /\n+/m, Text
rule /[ ]+/, Name::Variable
end
state :single_quoted_scalar do
mixin :quoted_scalar_whitespaces
rule /\\'/, Str::Escape
rule /'/, Str, :pop!
rule /[^\s']+/, Str
end
state :double_quoted_scalar do
rule /"/, Str, :pop!
mixin :quoted_scalar_whitespaces
# escapes
rule /\\[0abt\tn\nvfre "\\N_LP]/, Str::Escape
rule /\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
Str::Escape
rule /[^ \t\n\r\f\v"\\]+/, Str
end
state :plain_scalar_in_block_context_new_line do
rule /^[ ]+\n/, Text
rule /\n+/m, Text
rule /^(?=---|\.\.\.)/ do
pop! 3
end
# dedent detection
rule /^[ ]*/ do |m|
token Text
pop!
indent_size = m[0].size
# dedent = end of scalar
if indent_size <= self.indent
pop!
save_indent(m[0])
push :indentation
end
end
end
state :plain_scalar_in_block_context do
# the : indicator ends a scalar
rule /[ ]*(?=:[ \n]|:$)/, Text, :pop!
rule /[ ]*:/, Str
rule /[ ]+(?=#)/, Text, :pop!
rule /[ ]+$/, Text
# check for new documents or dedents at the new line
rule /\n+/ do
token Text
push :plain_scalar_in_block_context_new_line
end
rule /[ ]+/, Str
# regular non-whitespace characters
rule /[^\s:]+/, Str
end
state :plain_scalar_in_flow_context do
rule /[ ]*(?=[,:?\[\]{}])/, Text, :pop!
rule /[ ]+(?=#)/, Text, :pop!
rule /^[ ]+/, Text
rule /[ ]+$/, Text
rule /\n+/, Text
rule /[ ]+/, Name::Variable
rule /[^\s,:?\[\]{}]+/, Name::Variable
end
state :yaml_directive do
rule /([ ]+)(\d+\.\d+)/ do
groups Text, Num
goto :ignored_line
end
end
state :tag_directive do
rule %r(
([ ]+)(!|![\w-]*!) # prefix
([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
)x do
groups Text, Keyword::Type, Text, Keyword::Type
goto :ignored_line
end
end
end
end
end