require 'strscan'
class Mustache
# The Parser is responsible for taking a string template and
# converting it into an array of tokens and, really, expressions. It
# raises SyntaxError if there is anything it doesn't understand and
# knows which sigil corresponds to which tag type.
#
# For example, given this template:
#
# Hi {{thing}}!
#
# Run through the Parser we'll get these tokens:
#
# [:multi,
# [:static, "Hi "],
# [:mustache, :etag, "thing"],
# [:static, "!\n"]]
#
# You can see the array of tokens for any template with the
# mustache(1) command line tool:
#
# $ mustache --tokens test.mustache
# [:multi, [:static, "Hi "], [:mustache, :etag, "thing"], [:static, "!\n"]]
class Parser
# A SyntaxError is raised when the Parser comes across unclosed
# tags, sections, illegal content in tags, or anything of that
# sort.
class SyntaxError < StandardError
def initialize(message, position)
@message = message
@lineno, @column, @line, _ = position
@stripped_line = @line.strip
@stripped_column = @column - (@line.size - @line.lstrip.size)
end
def to_s
<<-EOF
#{@message}
Line #{@lineno}
#{@stripped_line}
#{' ' * @stripped_column}^
EOF
end
end
# The sigil types which are valid after an opening `{{`
VALID_TYPES = [ '#', '^', '/', '=', '!', '<', '>', '&', '{' ].map(&:freeze)
def self.valid_types
@valid_types ||= Regexp.new(VALID_TYPES.map { |t| Regexp.escape(t) }.join('|') )
end
# Add a supported sigil type (with optional aliases) to the Parser.
#
# Requires a block, which will be sent the following parameters:
#
# * content - The raw content of the tag
# * fetch- A mustache context fetch expression for the content
# * padding - Indentation whitespace from the currently-parsed line
# * pre_match_position - Location of the scanner before a match was made
#
# The provided block will be evaluated against the current instance of
# Parser, and may append to the Parser's @result as needed.
def self.add_type(*types, &block)
types = types.map(&:to_s)
type, *aliases = types
method_name = "scan_tag_#{type}".to_sym
define_method(method_name, &block)
aliases.each { |a| alias_method "scan_tag_#{a}", method_name }
types.each { |t| VALID_TYPES << t unless VALID_TYPES.include?(t) }
@valid_types = nil
end
# After these types of tags, all whitespace until the end of the line will
# be skipped if they are the first (and only) non-whitespace content on
# the line.
SKIP_WHITESPACE = [ '#', '^', '/', '<', '>', '=', '!' ].map(&:freeze)
# The content allowed in a tag name.
ALLOWED_CONTENT = /(\w|[?!\/.-])*/
# These types of tags allow any content,
# the rest only allow ALLOWED_CONTENT.
ANY_CONTENT = [ '!', '=' ].map(&:freeze)
attr_reader :otag, :ctag
# Accepts an options hash which does nothing but may be used in
# the future.
def initialize(options = {})
@options = options
@option_inline_partials_at_compile_time = options[:inline_partials_at_compile_time]
if @option_inline_partials_at_compile_time
@partial_resolver = options[:partial_resolver]
raise ArgumentError.new "Missing or invalid partial_resolver" unless @partial_resolver.respond_to? :call
end
# Initialize default tags
self.otag ||= '{{'
self.ctag ||= '}}'
end
# The opening tag delimiter. This may be changed at runtime.
def otag=(value)
regex = regexp value
@otag_regex = /([ \t]*)?#{regex}/
@otag_not_regex = /(^[ \t]*)?#{regex}/
@otag = value
end
# The closing tag delimiter. This too may be changed at runtime.
def ctag=(value)
@ctag_regex = regexp value
@ctag = value
end
# Given a string template, returns an array of tokens.
def compile(template)
@encoding = nil
if template.respond_to?(:encoding)
@encoding = template.encoding
template = template.dup.force_encoding("BINARY")
end
# Keeps information about opened sections.
@sections = []
@result = [:multi]
@scanner = StringScanner.new(template)
# Scan until the end of the template.
until @scanner.eos?
scan_tags || scan_text
end
unless @sections.empty?
# We have parsed the whole file, but there's still opened sections.
type, pos, _ = @sections.pop
error "Unclosed section #{type.inspect}", pos
end
@result
end
private
def content_tags type, current_ctag_regex
if ANY_CONTENT.include?(type)
r = /\s*#{regexp(type)}?#{current_ctag_regex}/
scan_until_exclusive(r)
else
@scanner.scan(ALLOWED_CONTENT)
end
end
def dispatch_based_on_type type, content, fetch, padding, pre_match_position
send("scan_tag_#{type}", content, fetch, padding, pre_match_position)
end
def find_closing_tag scanner, current_ctag_regex
error "Unclosed tag" unless scanner.scan(current_ctag_regex)
end
# Find {{mustaches}} and add them to the @result array.
def scan_tags
# Scan until we hit an opening delimiter.
start_of_line = @scanner.beginning_of_line?
pre_match_position = @scanner.pos
last_index = @result.length
return unless @scanner.scan @otag_regex
padding = @scanner[1] || ''
# Don't touch the preceding whitespace unless we're matching the start
# of a new line.
unless start_of_line
@result << [:static, padding] unless padding.empty?
pre_match_position += padding.length
padding = ''
end
# Since {{= rewrites ctag, we store the ctag which should be used
# when parsing this specific tag.
current_ctag_regex = @ctag_regex
type = @scanner.scan(self.class.valid_types)
@scanner.skip(/\s*/)
# ANY_CONTENT tags allow any character inside of them, while
# other tags (such as variables) are more strict.
content = content_tags(type, current_ctag_regex)
# We found {{ but we can't figure out what's going on inside.
error "Illegal content in tag" if content.empty?
fetch = [:mustache, :fetch, content.split('.')]
prev = @result
dispatch_based_on_type(type, content, fetch, padding, pre_match_position)
# The closing } in unescaped tags is just a hack for
# aesthetics.
type = "}" if type == "{"
# Skip whitespace and any balancing sigils after the content
# inside this tag.
@scanner.skip(/\s+/)
@scanner.skip(regexp(type)) if type
find_closing_tag(@scanner, current_ctag_regex)
# If this tag was the only non-whitespace content on this line, strip
# the remaining whitespace. If not, but we've been hanging on to padding
# from the beginning of the line, re-insert the padding as static text.
if start_of_line && !@scanner.eos?
if @scanner.peek(2) =~ /\r?\n/ && SKIP_WHITESPACE.include?(type)
@scanner.skip(/\r?\n/)
else
prev.insert(last_index, [:static, padding]) unless padding.empty?
end
end
# Store off the current scanner position now that we've closed the tag
# and consumed any irrelevant whitespace.
@sections.last[1] << @scanner.pos unless @sections.empty?
return unless @result == [:multi]
end
# Try to find static text, e.g. raw HTML with no {{mustaches}}.
def scan_text
text = scan_until_exclusive @otag_not_regex
if text.nil?
# Couldn't find any otag, which means the rest is just static text.
text = @scanner.rest
# Mark as done.
@scanner.terminate
end
text.force_encoding(@encoding) if @encoding
@result << [:static, text] unless text.empty?
end
# Scans the string until the pattern is matched. Returns the substring
# *excluding* the end of the match, advancing the scan pointer to that
# location. If there is no match, nil is returned.
def scan_until_exclusive(regexp)
pos = @scanner.pos
if @scanner.scan_until(regexp)
@scanner.pos -= @scanner.matched.size
@scanner.pre_match[pos..-1]
end
end
def offset
position[0, 2]
end
# Returns [lineno, column, line]
def position
# The rest of the current line
rest = @scanner.check_until(/\n|\Z/).to_s.chomp
# What we have parsed so far
parsed = @scanner.string[0...@scanner.pos]
lines = parsed.split("\n")
[ lines.size, lines.last.size - 1, lines.last + rest ]
end
# Used to quickly convert a string into a regular expression
# usable by the string scanner.
def regexp(thing)
Regexp.new Regexp.escape(thing) if thing
end
# Raises a SyntaxError. The message should be the name of the
# error - other details such as line number and position are
# handled for you.
def error(message, pos = position)
raise SyntaxError.new(message, pos)
end
#
# Scan tags
#
# These methods are called in `scan_tags`. Because they contain nonstandard
# characters in their method names, they are aliased to
# better named methods.
#
# This function handles the cases where the scanned tag does not have
# a type.
def scan_tag_ content, fetch, padding, pre_match_position
@result << [:mustache, :etag, fetch, offset]
end
def scan_tag_block content, fetch, padding, pre_match_position
block = [:multi]
@result << [:mustache, :section, fetch, offset, block]
@sections << [content, position, @result]
@result = block
end
alias_method :'scan_tag_#', :scan_tag_block
def scan_tag_inverted content, fetch, padding, pre_match_position
block = [:multi]
@result << [:mustache, :inverted_section, fetch, offset, block]
@sections << [content, position, @result]
@result = block
end
alias_method :'scan_tag_^', :scan_tag_inverted
def scan_tag_close content, fetch, padding, pre_match_position
section, pos, result = @sections.pop
if section.nil?
error "Closing unopened #{content.inspect}"
end
raw = @scanner.pre_match[pos[3]...pre_match_position] + padding
(@result = result).last << raw << [self.otag, self.ctag]
if section != content
error "Unclosed section #{section.inspect}", pos
end
end
alias_method :'scan_tag_/', :scan_tag_close
def scan_tag_comment content, fetch, padding, pre_match_position
end
alias_method :'scan_tag_!', :scan_tag_comment
def scan_tag_delimiter content, fetch, padding, pre_match_position
self.otag, self.ctag = content.split(' ', 2)
end
alias_method :'scan_tag_=', :scan_tag_delimiter
def scan_tag_open_partial content, fetch, padding, pre_match_position
@result << if @option_inline_partials_at_compile_time
partial = @partial_resolver.call content
partial.gsub!(/^/, padding) unless padding.empty?
self.class.new(@options).compile partial
else
[:mustache, :partial, content, offset, padding]
end
end
alias_method :'scan_tag_<', :scan_tag_open_partial
alias_method :'scan_tag_>', :scan_tag_open_partial
def scan_tag_unescaped content, fetch, padding, pre_match_position
@result << [:mustache, :utag, fetch, offset]
end
alias_method :'scan_tag_{', :'scan_tag_unescaped'
alias_method :'scan_tag_&', :'scan_tag_unescaped'
end
end