lib/maruku/input/charsource.rb
require 'strscan' module MaRuKu::In::Markdown::SpanLevelParser # a string scanner coded by me class CharSourceManual; end # a wrapper around StringScanner class CharSourceStrscan; end # A debug scanner that checks the correctness of both # by comparing their output class CharSourceDebug; end # Choose! CharSource = CharSourceManual # faster! 58ms vs. 65ms #CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius #CharSource = CharSourceDebug class CharSourceManual def initialize(s, parent=nil) raise "Passed #{s.class}" if not s.kind_of? String @buffer = s @buffer_index = 0 @parent = parent end # Return current char as a String (or nil). def cur_char cur_chars(1) end # Return the next n chars as a String. def cur_chars(n) return nil if @buffer_index >= @buffer.size @buffer[@buffer_index, n] end # Return the char after current char as a String (or nil). def next_char return nil if @buffer_index + 1 >= @buffer.size @buffer[@buffer_index + 1, 1] end def shift_char c = cur_char @buffer_index += 1 c end def ignore_char @buffer_index += 1 end def ignore_chars(n) @buffer_index += n end def current_remaining_buffer @buffer[@buffer_index, @buffer.size - @buffer_index] end def cur_chars_are(string) cur_chars(string.size) == string end def next_matches(r) r2 = /^.{#{@buffer_index}}#{r}/m r2.match @buffer end def read_regexp(r) r2 = /^#{r}/ rest = current_remaining_buffer m = r2.match(rest) if m @buffer_index += m.to_s.size end m end def consume_whitespace while c = cur_char break unless (c == ' ' || c == "\t") ignore_char end end def describe s = describe_pos(@buffer, @buffer_index) if @parent s += "\n\n" + @parent.describe end s end def describe_pos(buffer, buffer_index) len = 75 num_before = [len/2, buffer_index].min num_after = [len/2, buffer.size - buffer_index].min num_before_max = buffer_index num_after_max = buffer.size - buffer_index num_before = [num_before_max, len - num_after].min num_after = [num_after_max, len - num_before].min index_start = [buffer_index - num_before, 0].max index_end = [buffer_index + num_after, buffer.size].min size = index_end - index_start str = buffer[index_start, size] str.gsub!("\n", 'N') str.gsub!("\t", 'T') if index_end == buffer.size str += "EOF" end pre_s = buffer_index - index_start pre_s = [pre_s, 0].max pre_s2 = [len - pre_s, 0].max pre = " " * pre_s "-" * len + "\n" + str + "\n" + "-" * pre_s + "|" + "-" * pre_s2 + "\n" + pre + "+--- Byte #{buffer_index}\n"+ "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+ buffer.gsub(/^/, ">") end end class CharSourceStrscan def initialize(s, parent=nil) @scanner = StringScanner.new(s) @size = s.size end # Return current char as a String (or nil). def cur_char @scanner.peek(1)[0] end # Return the next n chars as a String. def cur_chars(n) @scanner.peek(n) end # Return the char after current char as a String (or nil). def next_char @scanner.peek(2)[1] end # Return a character as a String, advancing the pointer. def shift_char @scanner.getch[0] end # Advance the pointer def ignore_char @scanner.getch end # Advance the pointer by n def ignore_chars(n) n.times { @scanner.getch } end # Return the rest of the string def current_remaining_buffer @scanner.rest end # Returns true if string matches what we're pointing to def cur_chars_are(string) @scanner.peek(string.size) == string end # Returns true if Regexp r matches what we're pointing to def next_matches(r) @scanner.check(r) end def read_regexp(r) r.match(@scanner.scan(r)) end def consume_whitespace @scanner.skip(/\s+/) end def describe len = 75 num_before = [len/2, @scanner.pos].min num_after = [len/2, @scanner.rest_size].min num_before_max = @scanner.pos num_after_max = @scanner.rest_size num_before = [num_before_max, len - num_after].min num_after = [num_after_max, len - num_before].min index_start = [@scanner.pos - num_before, 0].max index_end = [@scanner.pos + num_after, @size].min size = index_end - index_start str = @scanner.string[index_start, size] str.gsub!("\n", 'N') str.gsub!("\t", 'T') if index_end == @size str += "EOF" end pre_s = @scanner.pos - index_start pre_s = [pre_s, 0].max pre_s2 = [len-pre_s, 0].max pre = " " * pre_s "-" * len + "\n" + str + "\n" + "-" * pre_s + "|" + "-" * pre_s2 + "\n" + pre + "+--- Byte #{@scanner.pos}\n" + "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" + @scanner.string.gsub(/^/, ">") end end class CharSourceDebug def initialize(s, parent) @a = CharSourceManual.new(s, parent) @b = CharSourceStrscan.new(s, parent) end def method_missing(methodname, *args) a_bef = @a.describe b_bef = @b.describe a = @a.send(methodname, *args) b = @b.send(methodname, *args) if a.kind_of? MatchData if a.to_a != b.to_a puts "called: #{methodname}(#{args})" puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}" puts "AFTER: " + @a.describe puts "AFTER: " + @b.describe puts "BEFORE: " + a_bef puts "BEFORE: " + b_bef puts caller.join("\n") exit end else if a != b puts "called: #{methodname}(#{args})" puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}" puts "" + @a.describe puts "" + @b.describe puts caller.join("\n") exit end end if @a.cur_char != @b.cur_char puts "Fuori sincronia dopo #{methodname}(#{args})" puts "" + @a.describe puts "" + @b.describe exit end return a end end end