# Copyright, 2012, by Samuel G. D. Williams. <http://www.codeotaku.com># # Permission is hereby granted, free of charge, to any person obtaining a copy# of this software and associated documentation files (the "Software"), to deal# in the Software without restriction, including without limitation the rights# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell# copies of the Software, and to permit persons to whom the Software is# furnished to do so, subject to the following conditions:# # The above copyright notice and this permission notice shall be included in# all copies or substantial portions of the Software.# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN# THE SOFTWARE.require'strscan'moduleTrenni# This parser processes general markup into a sequence of events which are passed to a delegate.classParserOPENED_TAG=:openedCLOSED_TAG=:closeddefself.line_at_offset(input,input_offset)line_number=1line_offset=offset=0input.each_linedo|line|line_offset=offsetoffset+=line.sizeifoffset>=input_offsetreturn{# The line that contains the input_offset::line_number=>line_number,# The offset to the start of that line::line_offset=>line_offset,# The number of characters from the start of the line to the input_offset::character_offset=>input_offset-line_offset,# The line of text itself::text=>line.chomp}endline_number+=1endreturnnilendclassParseError<StandardErrordefinitialize(message,scanner)@message=message@position=scanner.pos@line=Parser.line_at_offset(scanner.string,@position)endattr:offsetdefto_sif@offset"Parse Error: #{@message} @ [#{@line[0]}:#{@line[2]}]: #{@line[4]}"else"Parse Error [#{@position}]: #{@message}"endendenddefinitialize(delegate)@delegate=delegate# The delegate must respond to:# .begin_parse(scanner)# .text(escaped-data)# .cdata(unescaped-data)# .attribute(name, value-or-true)# .begin_tag(name, :opened or :closed)# .end_tag(begin_tag_type, :opened or :closed)# .comment(comment-text)# .instruction(instruction-text)enddefparse(string)scanner=StringScanner.new(string)@delegate.begin_parse(scanner)untilscanner.eos?start_pos=scanner.posscan_text(scanner)scan_tag(scanner)ifstart_pos==scanner.posraiseParseError.new("Scanner didn't move",scanner)endendendprotecteddefscan_text(scanner)# Match any character data except the open tag character.ifscanner.scan(/[^<]+/m)@delegate.text(scanner.matched)endenddefscan_tag(scanner)ifscanner.scan(/</)ifscanner.scan(/\//)scan_tag_normal(scanner,CLOSED_TAG)elsifscanner.scan(/!\[CDATA\[/)scan_tag_cdata(scanner)elsifscanner.scan(/!/)scan_tag_comment(scanner)elsifscanner.scan(/\?/)scan_tag_instruction(scanner)elsescan_tag_normal(scanner)endendenddefscan_attributes(scanner)# Parse an attribute in the form of key="value" or key.whilescanner.scan(/\s*([^\s=\/>]+)/um)name=scanner[1]ifscanner.scan(/=((['"])(.*?)\2)/um)@delegate.attribute(name,scanner[3])else@delegate.attribute(name,true)endendenddefscan_tag_normal(scanner,begin_tag_type=OPENED_TAG)ifscanner.scan(/[^\s\/>]+/)@delegate.begin_tag(scanner.matched,begin_tag_type)scanner.scan(/\s*/)self.scan_attributes(scanner)scanner.scan(/\s*/)ifscanner.scan(/\/>/)ifbegin_tag_type==CLOSED_TAGraiseParseError.new("Tag cannot be closed at both ends!",scanner)else@delegate.finish_tag(begin_tag_type,CLOSED_TAG)endelsifscanner.scan(/>/)@delegate.finish_tag(begin_tag_type,OPENED_TAG)elseraiseParseError.new("Invalid characters in tag!",scanner)endelseraiseParseError.new("Invalid tag!",scanner)endenddefscan_tag_cdata(scanner)ifscanner.scan_until(/(.*?)\]\]>/m)@delegate.cdata(scanner[1])elseraiseParseError.new("CDATA segment is not closed!",scanner)endenddefscan_tag_comment(scanner)ifscanner.scan(/--/)ifscanner.scan_until(/(.*?)-->/m)@delegate.comment("--"+scanner[1]+"--")elseraiseParseError.new("Comment is not closed!",scanner)endelseifscanner.scan_until(/(.*?)>/)@delegate.comment(scanner[1])elseraiseParseError.new("Comment is not closed!",scanner)endendenddefscan_tag_instruction(scanner)ifscanner.scan_until(/(.*)\?>/)@delegate.instruction(scanner[1])endendendend