lib/trenni/parser.rb



# Copyright, 2012, by Samuel G. D. Williams. <http://www.codeotaku.com>
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

require_relative 'scanner'

module Trenni
	# This parser processes general markup into a sequence of events which are passed to a delegate.
	class Parser < StringScanner
		OPENED_TAG = :opened
		CLOSED_TAG = :closed
		
		def initialize(buffer, delegate)
			super(buffer)
			
			@delegate = delegate
			
			# The delegate must respond to:
			# .begin_parse(self)
			# .text(escaped_data)
			# .cdata(unescaped_data)
			# .attribute(name, value_or_true)
			# .begin_tag(name, :opened or :closed)
			# .end_tag(begin_tag_type, :opened or :closed)
			# .doctype(doctype_attributes)
			# .comment(comment_text)
			# .instruction(instruction_text)
		end

		def parse!
			@delegate.begin_parse(self)

			until eos?
				start_pos = self.pos

				scan_text
				scan_tag

				raise_if_stuck(start_pos)
			end
		end

		protected

		def scan_text
			# Match any character data except the open tag character.
			if self.scan(/[^<]+/m)
				@delegate.text(self.matched)
			end
		end
		
		def scan_tag
			if self.scan(/</)
				if self.scan(/\//)
					scan_tag_normal(CLOSED_TAG)
				elsif self.scan(/!\[CDATA\[/)
					scan_tag_cdata
				elsif self.scan(/!--/)
					scan_tag_comment
				elsif self.scan(/!DOCTYPE/)
					scan_doctype
				elsif self.scan(/\?/)
					scan_tag_instruction
				else
					scan_tag_normal
				end
			end
		end

		def scan_attributes
			# Parse an attribute in the form of key="value" or key.
			while self.scan(/\s*([^\s=\/>]+)/um)
				name = self[1].freeze
				if self.scan(/=((['"])(.*?)\2)/um)
					value = self[3].freeze
					@delegate.attribute(name, value)
				else
					@delegate.attribute(name, true)
				end
			end
		end
		
		def scan_tag_normal(begin_tag_type = OPENED_TAG)
			if self.scan(/[^\s\/>]+/)
				@delegate.begin_tag(self.matched.freeze, begin_tag_type)
				
				self.scan(/\s*/)
				self.scan_attributes
				self.scan(/\s*/)
				
				if self.scan(/\/>/)
					if begin_tag_type == CLOSED_TAG
						parse_error!("Tag cannot be closed at both ends!")
					else
						@delegate.finish_tag(begin_tag_type, CLOSED_TAG)
					end
				elsif self.scan(/>/)
					@delegate.finish_tag(begin_tag_type, OPENED_TAG)
				else
					parse_error!("Invalid characters in tag!")
				end
			else
				parse_error!("Invalid tag!")
			end
		end
		
		def scan_doctype
			if self.scan_until(/(.*?)>/)
				@delegate.doctype(self[1].strip.freeze)
			else
				parse_error!("DOCTYPE is not closed!")
			end
		end
		
		def scan_tag_cdata
			if self.scan_until(/(.*?)\]\]>/m)
				@delegate.cdata(self[1].freeze)
			else
				parse_error!("CDATA segment is not closed!")
			end
		end
		
		def scan_tag_comment
			if self.scan_until(/(.*?)-->/m)
				@delegate.comment(self[1].freeze)
			else
				parse_error!("Comment is not closed!")
			end
		end
		
		def scan_tag_instruction
			if self.scan_until(/(.*)\?>/)
				@delegate.instruction(self[1].freeze)
			end
		end
	end
end