def pull_event
if @closed
x, @closed = @closed, nil
return [ :end_element, x ]
end
if empty?
if @document_status == :in_doctype
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
end
unless @tags.empty?
path = "/" + @tags.join("/")
raise ParseException.new("Missing end tag for '#{path}'", @source)
end
return [ :end_document ]
end
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
@source.ensure_buffer
if @document_status == nil
start_position = @source.position
if @source.match?("<?", true)
return process_instruction
elsif @source.match?("<!", true)
if @source.match?("--", true)
md = @source.match(/(.*?)-->/um, true)
if md.nil?
raise REXML::ParseException.new("Unclosed comment", @source)
end
if /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ]
elsif @source.match?("DOCTYPE", true)
base_error_message = "Malformed DOCTYPE"
unless @source.match?(/\s+/um, true)
if @source.match?(">")
message = "#{base_error_message}: name is missing"
else
message = "#{base_error_message}: invalid name"
end
@source.position = start_position
raise REXML::ParseException.new(message, @source)
end
name = parse_name(base_error_message)
@source.match?(/\s*/um, true) # skip spaces
if @source.match?("[", true)
id = [nil, nil, nil]
@document_status = :in_doctype
elsif @source.match?(">", true)
id = [nil, nil, nil]
@document_status = :after_doctype
@source.ensure_buffer
else
id = parse_id(base_error_message,
accept_external_id: true,
accept_public_id: false)
if id[0] == "SYSTEM"
# For backward compatibility
id[1], id[2] = id[2], nil
end
@source.match?(/\s*/um, true) # skip spaces
if @source.match?("[", true)
@document_status = :in_doctype
elsif @source.match?(">", true)
@document_status = :after_doctype
@source.ensure_buffer
else
message = "#{base_error_message}: garbage after external ID"
raise REXML::ParseException.new(message, @source)
end
end
args = [:start_doctype, name, *id]
if @document_status == :after_doctype
@source.match?(/\s*/um, true)
@stack << [ :end_doctype ]
end
return args
else
message = "Invalid XML"
raise REXML::ParseException.new(message, @source)
end
end
end
if @document_status == :in_doctype
@source.match?(/\s*/um, true) # skip spaces
start_position = @source.position
if @source.match?("<!", true)
if @source.match?("ELEMENT", true)
md = @source.match(/(.*?)>/um, true)
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
return [ :elementdecl, "<!ELEMENT" + md[1] ]
elsif @source.match?("ENTITY", true)
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
unless match_data
raise REXML::ParseException.new("Malformed entity declaration", @source)
end
match = [:entitydecl, *match_data.captures.compact]
ref = false
if match[1] == '%'
ref = true
match.delete_at 1
end
# Now we have to sort out what kind of entity reference this is
if match[2] == 'SYSTEM'
# External reference
match[3] = match[3][1..-2] # PUBID
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
elsif match[2] == 'PUBLIC'
# External reference
match[3] = match[3][1..-2] # PUBID
match[4] = match[4][1..-2] # HREF
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
# match is [ :entity, name, value ]
end
match << '%' if ref
return match
elsif @source.match?("ATTLIST", true)
md = @source.match(Private::ATTLISTDECL_END, true)
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
element = md[1]
contents = "<!ATTLIST" + md[0]
pairs = {}
values = md[0].strip.scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
if attdef[0] =~ /^xmlns:(.*)/
@namespaces[$1] = val
end
end
end
return [ :attlistdecl, element, pairs, contents ]
elsif @source.match?("NOTATION", true)
base_error_message = "Malformed notation declaration"
unless @source.match?(/\s+/um, true)
if @source.match?(">")
message = "#{base_error_message}: name is missing"
else
message = "#{base_error_message}: invalid name"
end
@source.position = start_position
raise REXML::ParseException.new(message, @source)
end
name = parse_name(base_error_message)
id = parse_id(base_error_message,
accept_external_id: true,
accept_public_id: true)
@source.match?(/\s*/um, true) # skip spaces
unless @source.match?(">", true)
message = "#{base_error_message}: garbage before end >"
raise REXML::ParseException.new(message, @source)
end
return [:notationdecl, name, *id]
elsif md = @source.match(/--(.*?)-->/um, true)
case md[1]
when /--/, /-\z/
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ] if md
end
elsif match = @source.match(/(%.*?;)\s*/um, true)
return [ :externalentity, match[1] ]
elsif @source.match?(/\]\s*>/um, true)
@document_status = :after_doctype
return [ :end_doctype ]
end
if @document_status == :in_doctype
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
end
end
if @document_status == :after_doctype
@source.match?(/\s*/um, true)
end
begin
start_position = @source.position
if @source.match?("<", true)
# :text's read_until may remain only "<" in buffer. In the
# case, buffer is empty here. So we need to fill buffer
# here explicitly.
@source.ensure_buffer
if @source.match?("/", true)
@namespaces_restore_stack.pop
last_tag = @tags.pop
md = @source.match(Private::CLOSE_PATTERN, true)
if md and !last_tag
message = "Unexpected top-level end tag (got '#{md[1]}')"
raise REXML::ParseException.new(message, @source)
end
if md.nil? or last_tag != md[1]
message = "Missing end tag for '#{last_tag}'"
message += " (got '#{md[1]}')" if md
@source.position = start_position if md.nil?
raise REXML::ParseException.new(message, @source)
end
return [ :end_element, last_tag ]
elsif @source.match?("!", true)
md = @source.match(/([^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][0] == ?-
md = @source.match(/--(.*?)-->/um, true)
if md.nil? || /--|-\z/.match?(md[1])
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ]
else
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.match?("?", true)
return process_instruction
else
# Get the next tag
md = @source.match(Private::TAG_PATTERN, true)
unless md
@source.position = start_position
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
tag = md[1]
@document_status = :in_element
@prefixes.clear
@prefixes << md[2] if md[2]
push_namespaces_restore
attributes, closed = parse_attributes(@prefixes)
# Verify that all of the prefixes have been defined
for prefix in @prefixes
unless @namespaces.key?(prefix)
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if closed
@closed = tag
pop_namespaces_restore
else
if @tags.empty? and @have_root
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
end
@tags.push( tag )
end
@have_root = true
return [ :start_element, tag, attributes ]
end
else
text = @source.read_until("<")
if text.chomp!("<")
@source.position -= "<".bytesize
end
if @tags.empty?
unless /\A\s*\z/.match?(text)
if @have_root
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
else
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
end
end
return pull_event if @have_root
end
return [ :text, text ]
end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException
raise
rescue => error
raise REXML::ParseException.new( "Exception parsing",
@source, self, (error ? error : $!) )
end
return [ :dummy ]
end