module Parser
class Lexer::Literal
DELIMITERS = { '(' => ')', '[' => ']', '{' => '}', '<' => '>' }
MONOLITHIC = { :tSTRING_BEG => :tSTRING }
TYPES = {
# type start token interpolate?
"'" => [ :tSTRING_BEG, false ],
'%q' => [ :tSTRING_BEG, false ],
'"' => [ :tSTRING_BEG, true ],
'%' => [ :tSTRING_BEG, true ],
'%Q' => [ :tSTRING_BEG, true ],
'%w' => [ :tQWORDS_BEG, false ],
'%W' => [ :tWORDS_BEG, true ],
'%i' => [ :tQSYMBOLS_BEG, false ],
'%I' => [ :tSYMBOLS_BEG, true ],
":'" => [ :tSYMBEG, false ],
'%s' => [ :tSYMBEG, false ],
':"' => [ :tSYMBEG, true ],
'/' => [ :tREGEXP_BEG, true ],
'%r' => [ :tREGEXP_BEG, true ],
'%x' => [ :tXSTRING_BEG, true ],
'`' => [ :tXSTRING_BEG, true ],
}
attr_reader :heredoc_e, :str_s
attr_accessor :saved_herebody_s
def initialize(lexer, str_type, delimiter, str_s, heredoc_e = nil, indent = false)
@lexer = lexer
@nesting = 1
unless TYPES.include?(str_type)
message = ERRORS[:unexpected_percent_str] % { :type => str_type }
lexer.send(:diagnostic, :error, message, @lexer.send(:range, str_s, str_s + 2))
end
# String type. For :'foo', it is :'
@str_type = str_type
# Start of the string type specifier.
@str_s = str_s
# Data buffer.
@buffer = ''
# Start of the current chunk in data buffer.
@buffer_s = nil
@start_tok, @interpolate = TYPES[str_type]
@start_delim = DELIMITERS.include?(delimiter) ? delimiter : nil
@end_delim = DELIMITERS.fetch(delimiter, delimiter)
@heredoc_e = heredoc_e
@indent = indent
@interp_braces = 0
@space_emitted = true
# Monolithic strings are glued into a single token, e.g.
# tSTRING_BEG tSTRING_CONTENT tSTRING_END -> tSTRING.
@monolithic = (@start_tok == :tSTRING_BEG &&
%w(' ").include?(str_type) &&
!heredoc?)
# Capture opening delimiter in percent-literals.
unless @heredoc_e || @str_type.end_with?(delimiter)
@str_type << delimiter
end
emit_start_tok unless @monolithic
end
def interpolate?
@interpolate
end
def words?
type == :tWORDS_BEG || type == :tQWORDS_BEG ||
type == :tSYMBOLS_BEG || type == :tQSYMBOLS_BEG
end
def regexp?
type == :tREGEXP_BEG
end
def heredoc?
!!@heredoc_e
end
def type
@start_tok
end
def munge_escape?(character)
if words? && character =~ /[ \t\v\r\f\n]/
true
else
['\\', @start_delim, @end_delim].include?(character)
end
end
def delimiter?(delimiter)
if @indent
@end_delim == delimiter.lstrip
else
@end_delim == delimiter
end
end
def nest_and_try_closing(delimiter, ts, te)
if @start_delim && @start_delim == delimiter
@nesting += 1
elsif delimiter?(delimiter)
@nesting -= 1
end
# Finalize if last matching delimiter is closed.
if @nesting == 0
if words?
extend_space(ts, ts)
end
# Emit the string as a single token if it's applicable.
if @monolithic
emit(MONOLITHIC[@start_tok], @buffer, @str_s, te)
else
# If this is a heredoc, @buffer contains the sentinel now.
# Just throw it out. Lexer flushes the heredoc after each
# non-heredoc-terminating \n anyway, so no data will be lost.
flush_string unless heredoc?
emit(:tSTRING_END, @end_delim, ts, te)
end
end
end
def start_interp_brace
@interp_braces += 1
end
def end_interp_brace_and_try_closing
@interp_braces -= 1
(@interp_braces == 0)
end
def extend_string(string, ts, te)
if @buffer_s.nil?
@buffer_s = ts
end
@buffer_e = te
@buffer << string
end
def flush_string
if @monolithic
emit_start_tok
@monolithic = false
end
unless @buffer.empty?
emit(:tSTRING_CONTENT, @buffer, @buffer_s, @buffer_e)
@buffer = ''
@buffer_s = nil
@buffer_e = nil
extend_content
end
end
def extend_content
@space_emitted = false
end
def extend_space(ts, te)
flush_string
unless @space_emitted
emit(:tSPACE, nil, ts, te)
@space_emitted = true
end
end
protected
def emit_start_tok
str_e = @heredoc_e || @str_s + @str_type.length
emit(@start_tok, @str_type, @str_s, str_e)
end
def emit(token, type, s, e)
@lexer.send(:emit, token, type, s, e)
end
end
end