module Regexp::Scanner
def self.append_literal(data, ts, te)
Appends one or more characters to the literal buffer, to be emitted later
def self.append_literal(data, ts, te) @literal ||= [] @literal << text(data, ts, te) end
def self.copy(data, range)
def self.copy(data, range) data[range].pack('c*') end
def self.emit(type, token, text, ts, te)
def self.emit(type, token, text, ts, te) #puts "EMIT: type: #{type}, token: #{token}, text: #{text}, ts: #{ts}, te: #{te}" emit_literal if @literal if @block @block.call type, token, text, ts, te end @tokens << [type, token, text, ts, te] end
def self.emit_literal
Emits the literal run collected by calls to the append_literal method,
def self.emit_literal ts, te = @literal.first[1], @literal.last[2] text = @literal.map {|t| t[0]}.join text.force_encoding('utf-8') if text.respond_to?(:force_encoding) @literal = nil emit(:literal, :literal, text, ts, te) end
def self.emit_options(text, ts, te)
def self.emit_options(text, ts, te) if text =~ /\(\?([mixdau]+)?-?([mix]+)?:/ positive, negative = $1, $2 if positive =~ /x/ @free_spacing = true end # If the x appears in both, treat it like ruby does, the second cancels # the first. if negative =~ /x/ @free_spacing = false end end @in_options = true @spacing_stack << [@free_spacing, @group_depth] emit(:group, :options, text, ts, te) end
def self.empty_backref_error(type, what)
def self.empty_backref_error(type, what) validation_error(:backref, what, 'ref ID is empty') end
def self.empty_name_error(type, what)
def self.empty_name_error(type, what) validation_error(type, what, 'name is empty') end
def self.scan(input_object, &block)
This method may raise errors if a syntax error is encountered.
given, it gets called for each emitted token.
emitted token into an array that gets returned at the end. If a block is
Scans the given regular expression text, or Regexp object and collects the
def self.scan(input_object, &block) top, stack = 0, [] if input_object.is_a?(Regexp) input = input_object.source @free_spacing = (input_object.options & Regexp::EXTENDED != 0) else input = input_object @free_spacing = false end data = input.unpack("c*") if input.is_a?(String) eof = data.length @tokens = [] @block = block_given? ? block : nil @in_group, @group_depth = false, 0 @in_options, @spacing_stack = false, [[@free_spacing, 0]] in_set, set_depth, set_type = false, 0, :set in_conditional, conditional_depth, conditional_stack = false, 0, [] line 1634 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" gin ||= 0 e ||= data.length s = re_scanner_start op = 0 s = nil e = nil ct = 0 d line 860 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" line 1647 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" gin estEof = false slen, _trans, _keys, _inds, _acts, _nacts = nil goto_level = 0 resume = 10 eof_trans = 15 again = 20 test_eof = 30 out = 40 hile true f _goto_level <= 0 f p == pe _goto_level = _test_eof next nd f cs == 0 _goto_level = _out next nd nd f _goto_level <= _resume ase _re_scanner_from_state_actions[cs] hen 39 then line 1 "NONE" begin = p end line 1675 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" nd keys = cs << 1 inds = _re_scanner_index_offsets[cs] slen = _re_scanner_key_spans[cs] wide = data[p].ord trans = if ( _slen > 0 && _re_scanner_trans_keys[_keys] <= _wide && _wide <= _re_scanner_trans_keys[_keys + 1] ) then _re_scanner_indicies[ _inds + _wide - _re_scanner_trans_keys[_keys] ] else _re_scanner_indicies[ _inds + _slen ] end nd f _goto_level <= _eof_trans s = _re_scanner_trans_targs[_trans] f _re_scanner_trans_actions[_trans] != 0 ase _re_scanner_trans_actions[_trans] hen 14 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end hen 7 then line 146 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth -= 1; @in_group = @group_depth > 0 ? true : false end hen 18 then line 1 "NONE" begin = p+1 end hen 16 then line 61 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/property.rl" begin = p+1 egin text = text(data, ts, te, 1).first if in_set type = :set else type = text[1,1] == 'p' ? :property : :nonproperty end name = data[ts+2..te-2].pack('c*').gsub(/[\s_]/,'').downcase if name[0].chr == '^' name = name[1..-1] type = :nonproperty end case name # Named when 'alnum' self.emit(type, :alnum, text, ts-1, te) when 'alpha' self.emit(type, :alpha, text, ts-1, te) when 'ascii' self.emit(type, :ascii, text, ts-1, te) when 'blank' self.emit(type, :blank, text, ts-1, te) when 'cntrl' self.emit(type, :cntrl, text, ts-1, te) when 'digit' self.emit(type, :digit, text, ts-1, te) when 'graph' self.emit(type, :graph, text, ts-1, te) when 'lower' self.emit(type, :lower, text, ts-1, te) when 'print' self.emit(type, :print, text, ts-1, te) when 'punct' self.emit(type, :punct, text, ts-1, te) when 'space' self.emit(type, :space, text, ts-1, te) when 'upper' self.emit(type, :upper, text, ts-1, te) when 'xdigit' self.emit(type, :xdigit, text, ts-1, te) when 'any' self.emit(type, :any, text, ts-1, te) when 'assigned' self.emit(type, :assigned, text, ts-1, te) when 'newline' self.emit(type, :newline, text, ts-1, te) when 'word' self.emit(type, :word, text, ts-1, te) # Letters when 'l', 'letter' self.emit(type, :letter_any, text, ts-1, te) when 'lu', 'uppercaseletter' self.emit(type, :letter_uppercase, text, ts-1, te) when 'll', 'lowercaseletter' self.emit(type, :letter_lowercase, text, ts-1, te) when 'lt', 'titlecaseletter' self.emit(type, :letter_titlecase, text, ts-1, te) when 'lm', 'modifierletter' self.emit(type, :letter_modifier, text, ts-1, te) when 'lo', 'otherletter' self.emit(type, :letter_other, text, ts-1, te) # Marks when 'm', 'mark' self.emit(type, :mark_any, text, ts-1, te) when 'mn', 'nonspacingmark' self.emit(type, :mark_nonspacing, text, ts-1, te) when 'mc', 'spacingmark' self.emit(type, :mark_spacing, text, ts-1, te) when 'me', 'enclosingmark' self.emit(type, :mark_enclosing, text, ts-1, te) # Numbers when 'n', 'number' self.emit(type, :number_any, text, ts-1, te) when 'nd', 'decimalnumber' self.emit(type, :number_decimal, text, ts-1, te) when 'nl', 'letternumber' self.emit(type, :number_letter, text, ts-1, te) when 'no', 'othernumber' self.emit(type, :number_other, text, ts-1, te) # Punctuation when 'p', 'punctuation' self.emit(type, :punct_any, text, ts-1, te) when 'pc', 'connectorpunctuation' self.emit(type, :punct_connector, text, ts-1, te) when 'pd', 'dashpunctuation' self.emit(type, :punct_dash, text, ts-1, te) when 'ps', 'openpunctuation' self.emit(type, :punct_open, text, ts-1, te) when 'pe', 'closepunctuation' self.emit(type, :punct_close, text, ts-1, te) when 'pi', 'initialpunctuation' self.emit(type, :punct_initial, text, ts-1, te) when 'pf', 'finalpunctuation' self.emit(type, :punct_final, text, ts-1, te) when 'po', 'otherpunctuation' self.emit(type, :punct_other, text, ts-1, te) # Symbols when 's', 'symbol' self.emit(type, :symbol_any, text, ts-1, te) when 'sm', 'mathsymbol' self.emit(type, :symbol_math, text, ts-1, te) when 'sc', 'currencysymbol' self.emit(type, :symbol_currency, text, ts-1, te) when 'sk', 'modifiersymbol' self.emit(type, :symbol_modifier, text, ts-1, te) when 'so', 'othersymbol' self.emit(type, :symbol_other, text, ts-1, te) # Separators when 'z', 'separator' self.emit(type, :separator_any, text, ts-1, te) when 'zs', 'spaceseparator' self.emit(type, :separator_space, text, ts-1, te) when 'zl', 'lineseparator' self.emit(type, :separator_line, text, ts-1, te) when 'zp', 'paragraphseparator' self.emit(type, :separator_para, text, ts-1, te) # Codepoints when 'c', 'other' self.emit(type, :other, text, ts-1, te) when 'cc', 'control' self.emit(type, :control, text, ts-1, te) when 'cf', 'format' self.emit(type, :format, text, ts-1, te) when 'cs', 'surrogate' self.emit(type, :surrogate, text, ts-1, te) when 'co', 'privateuse' self.emit(type, :private_use, text, ts-1, te) when 'cn', 'unassigned' self.emit(type, :unassigned, text, ts-1, te) # Age when 'age=1.1' self.emit(type, :age_1_1, text, ts-1, te) when 'age=2.0' self.emit(type, :age_2_0, text, ts-1, te) when 'age=2.1' self.emit(type, :age_2_1, text, ts-1, te) when 'age=3.0' self.emit(type, :age_3_0, text, ts-1, te) when 'age=3.1' self.emit(type, :age_3_1, text, ts-1, te) when 'age=3.2' self.emit(type, :age_3_2, text, ts-1, te) when 'age=4.0' self.emit(type, :age_4_0, text, ts-1, te) when 'age=4.1' self.emit(type, :age_4_1, text, ts-1, te) when 'age=5.0' self.emit(type, :age_5_0, text, ts-1, te) when 'age=5.1' self.emit(type, :age_5_1, text, ts-1, te) when 'age=5.2' self.emit(type, :age_5_2, text, ts-1, te) when 'age=6.0' self.emit(type, :age_6_0, text, ts-1, te) when 'age=6.1' self.emit(type, :age_6_1, text, ts-1, te) when 'age=6.2' self.emit(type, :age_6_2, text, ts-1, te) when 'age=6.3' self.emit(type, :age_6_3, text, ts-1, te) when 'age=7.0' self.emit(type, :age_7_0, text, ts-1, te) # Derived Properties when 'ahex', 'asciihexdigit' self.emit(type, :ascii_hex, text, ts-1, te) when 'alphabetic' self.emit(type, :alphabetic, text, ts-1, te) when 'cased' self.emit(type, :cased, text, ts-1, te) when 'cwcf', 'changeswhencasefolded' self.emit(type, :changes_when_casefolded, text, ts-1, te) when 'cwcm', 'changeswhencasemapped' self.emit(type, :changes_when_casemapped, text, ts-1, te) when 'cwl', 'changeswhenlowercased' self.emit(type, :changes_when_lowercased, text, ts-1, te) when 'cwt', 'changeswhentitlecased' self.emit(type, :changes_when_titlecased, text, ts-1, te) when 'cwu', 'changeswhenuppercased' self.emit(type, :changes_when_uppercased, text, ts-1, te) when 'ci', 'caseignorable' self.emit(type, :case_ignorable, text, ts-1, te) when 'bidic', 'bidicontrol' self.emit(type, :bidi_control, text, ts-1, te) when 'dash' self.emit(type, :dash, text, ts-1, te) when 'dep', 'deprecated' self.emit(type, :deprecated, text, ts-1, te) when 'di', 'defaultignorablecodepoint' self.emit(type, :default_ignorable_cp, text, ts-1, te) when 'dia', 'diacritic' self.emit(type, :diacritic, text, ts-1, te) when 'ext', 'extender' self.emit(type, :extender, text, ts-1, te) when 'grbase', 'graphemebase' self.emit(type, :grapheme_base, text, ts-1, te) when 'grext', 'graphemeextend' self.emit(type, :grapheme_extend, text, ts-1, te) when 'grlink', 'graphemelink' # NOTE: deprecated as of Unicode 5.0 self.emit(type, :grapheme_link, text, ts-1, te) when 'hex', 'hexdigit' self.emit(type, :hex_digit, text, ts-1, te) when 'hyphen' # NOTE: deprecated as of Unicode 6.0 self.emit(type, :hyphen, text, ts-1, te) when 'idc', 'idcontinue' self.emit(type, :id_continue, text, ts-1, te) when 'ideo', 'ideographic' self.emit(type, :ideographic, text, ts-1, te) when 'ids', 'idstart' self.emit(type, :id_start, text, ts-1, te) when 'idsb', 'idsbinaryoperator' self.emit(type, :ids_binary_op, text, ts-1, te) when 'idst', 'idstrinaryoperator' self.emit(type, :ids_trinary_op, text, ts-1, te) when 'joinc', 'joincontrol' self.emit(type, :join_control, text, ts-1, te) when 'loe', 'logicalorderexception' self.emit(type, :logical_order_exception, text, ts-1, te) when 'lowercase' self.emit(type, :lowercase, text, ts-1, te) when 'math' self.emit(type, :math, text, ts-1, te) when 'nchar', 'noncharactercodepoint' self.emit(type, :non_character_cp, text, ts-1, te) when 'oalpha', 'otheralphabetic' self.emit(type, :other_alphabetic, text, ts-1, te) when 'odi', 'otherdefaultignorablecodepoint' self.emit(type, :other_default_ignorable_cp, text, ts-1, te) when 'ogrext', 'othergraphemeextend' self.emit(type, :other_grapheme_extended, text, ts-1, te) when 'oidc', 'otheridcontinue' self.emit(type, :other_id_continue, text, ts-1, te) when 'oids', 'otheridstart' self.emit(type, :other_id_start, text, ts-1, te) when 'olower', 'otherlowercase' self.emit(type, :other_lowercase, text, ts-1, te) when 'omath', 'othermath' self.emit(type, :other_math, text, ts-1, te) when 'oupper', 'otheruppercase' self.emit(type, :other_uppercase, text, ts-1, te) when 'patsyn', 'patternsyntax' self.emit(type, :pattern_syntax, text, ts-1, te) when 'patws', 'patternwhitespace' self.emit(type, :pattern_whitespace, text, ts-1, te) when 'qmark', 'quotationmark' self.emit(type, :quotation_mark, text, ts-1, te) when 'radical' self.emit(type, :radical, text, ts-1, te) when 'sd', 'softdotted' self.emit(type, :soft_dotted, text, ts-1, te) when 'sterm' self.emit(type, :sentence_terminal, text, ts-1, te) when 'term', 'terminalpunctuation' self.emit(type, :terminal_punctuation, text, ts-1, te) when 'uideo', 'unifiedideograph' self.emit(type, :unified_ideograph, text, ts-1, te) when 'uppercase' self.emit(type, :uppercase, text, ts-1, te) when 'vs', 'variationselector' self.emit(type, :variation_selector, text, ts-1, te) when 'wspace', 'whitespace' self.emit(type, :whitespace, text, ts-1, te) when 'xids', 'xidstart' self.emit(type, :xid_start, text, ts-1, te) when 'xidc', 'xidcontinue' self.emit(type, :xid_continue, text, ts-1, te) # Scripts when 'aghb', 'caucasianalbanian' self.emit(type, :script_caucasian_albanian, text, ts-1, te) when 'arab', 'arabic' self.emit(type, :script_arabic, text, ts-1, te) when 'armi', 'imperialaramaic' self.emit(type, :script_imperial_aramaic, text, ts-1, te) when 'armn', 'armenian' self.emit(type, :script_armenian, text, ts-1, te) when 'avst', 'avestan' self.emit(type, :script_avestan, text, ts-1, te) when 'bali', 'balinese' self.emit(type, :script_balinese, text, ts-1, te) when 'bamu', 'bamum' self.emit(type, :script_bamum, text, ts-1, te) when 'bass', 'bassavah' self.emit(type, :script_bassa_vah, text, ts-1, te) when 'batk', 'batak' self.emit(type, :script_batak, text, ts-1, te) when 'beng', 'bengali' self.emit(type, :script_bengali, text, ts-1, te) when 'bopo', 'bopomofo' self.emit(type, :script_bopomofo, text, ts-1, te) when 'brah', 'brahmi' self.emit(type, :script_brahmi, text, ts-1, te) when 'brai', 'braille' self.emit(type, :script_braille, text, ts-1, te) when 'bugi', 'buginese' self.emit(type, :script_buginese, text, ts-1, te) when 'buhd', 'buhid' self.emit(type, :script_buhid, text, ts-1, te) when 'cans', 'canadianaboriginal' self.emit(type, :script_canadian_aboriginal, text, ts-1, te) when 'cari', 'carian' self.emit(type, :script_carian, text, ts-1, te) when 'cham' self.emit(type, :script_cham, text, ts-1, te) when 'cher', 'cherokee' self.emit(type, :script_cherokee, text, ts-1, te) when 'copt', 'coptic', 'qaac' self.emit(type, :script_coptic, text, ts-1, te) when 'cprt', 'cypriot' self.emit(type, :script_cypriot, text, ts-1, te) when 'cyrl', 'cyrillic' self.emit(type, :script_cyrillic, text, ts-1, te) when 'deva', 'devanagari' self.emit(type, :script_devanagari, text, ts-1, te) when 'dsrt', 'deseret' self.emit(type, :script_deseret, text, ts-1, te) when 'dupl', 'duployan' self.emit(type, :script_duployan, text, ts-1, te) when 'egyp', 'egyptianhieroglyphs' self.emit(type, :script_egyptian_hieroglyphs, text, ts-1, te) when 'elba', 'elbasan' self.emit(type, :script_elbasan, text, ts-1, te) when 'ethi', 'ethiopic' self.emit(type, :script_ethiopic, text, ts-1, te) when 'geor', 'georgian' self.emit(type, :script_georgian, text, ts-1, te) when 'glag', 'glagolitic' self.emit(type, :script_glagolitic, text, ts-1, te) when 'goth', 'gothic' self.emit(type, :script_gothic, text, ts-1, te) when 'gran', 'grantha' self.emit(type, :script_grantha, text, ts-1, te) when 'grek', 'greek' self.emit(type, :script_greek, text, ts-1, te) when 'gujr', 'gujarati' self.emit(type, :script_gujarati, text, ts-1, te) when 'guru', 'gurmukhi' self.emit(type, :script_gurmukhi, text, ts-1, te) when 'hang', 'hangul' self.emit(type, :script_hangul, text, ts-1, te) when 'hani', 'han' self.emit(type, :script_han, text, ts-1, te) when 'hano', 'hanunoo' self.emit(type, :script_hanunoo, text, ts-1, te) when 'hebr', 'hebrew' self.emit(type, :script_hebrew, text, ts-1, te) when 'hira', 'hiragana' self.emit(type, :script_hiragana, text, ts-1, te) when 'hmng', 'pahawhhmong' self.emit(type, :script_pahawh_hmong, text, ts-1, te) when 'hrkt', 'katakanaorhiragana' self.emit(type, :script_katakana_or_hiragana, text, ts-1, te) when 'ital', 'olditalic' self.emit(type, :script_old_italic, text, ts-1, te) when 'java', 'javanese' self.emit(type, :script_javanese, text, ts-1, te) when 'kali', 'kayahli' self.emit(type, :script_kayah_li, text, ts-1, te) when 'kana', 'katakana' self.emit(type, :script_katakana, text, ts-1, te) when 'khar', 'kharoshthi' self.emit(type, :script_kharoshthi, text, ts-1, te) when 'khmr', 'khmer' self.emit(type, :script_khmer, text, ts-1, te) when 'khoj', 'khojki' self.emit(type, :script_khojki, text, ts-1, te) when 'knda', 'kannada' self.emit(type, :script_kannada, text, ts-1, te) when 'kthi', 'kaithi' self.emit(type, :script_kaithi, text, ts-1, te) when 'lana', 'taitham' self.emit(type, :script_tai_tham, text, ts-1, te) when 'laoo', 'lao' self.emit(type, :script_lao, text, ts-1, te) when 'latn', 'latin' self.emit(type, :script_latin, text, ts-1, te) when 'lepc', 'lepcha' self.emit(type, :script_lepcha, text, ts-1, te) when 'limb', 'limbu' self.emit(type, :script_limbu, text, ts-1, te) when 'lina', 'lineara' self.emit(type, :script_linear_a, text, ts-1, te) when 'linb', 'linearb' self.emit(type, :script_linear_b, text, ts-1, te) when 'lisu' self.emit(type, :script_lisu, text, ts-1, te) when 'lyci', 'lycian' self.emit(type, :script_lycian, text, ts-1, te) when 'lydi', 'lydian' self.emit(type, :script_lydian, text, ts-1, te) when 'mlym', 'malayalam' self.emit(type, :script_malayalam, text, ts-1, te) when 'mahj', 'mahajani' self.emit(type, :script_mahajani, text, ts-1, te) when 'mand', 'mandaic' self.emit(type, :script_mandaic, text, ts-1, te) when 'mani', 'manichaean' self.emit(type, :script_manichaean, text, ts-1, te) when 'mend', 'mendekikakui' self.emit(type, :script_mende_kikakui, text, ts-1, te) when 'modi' self.emit(type, :script_modi, text, ts-1, te) when 'mong', 'mongolian' self.emit(type, :script_mongolian, text, ts-1, te) when 'mroo', 'mro' self.emit(type, :script_mro, text, ts-1, te) when 'mtei', 'meeteimayek' self.emit(type, :script_meetei_mayek, text, ts-1, te) when 'mymr', 'myanmar' self.emit(type, :script_myanmar, text, ts-1, te) when 'narb', 'oldnortharabian' self.emit(type, :script_old_north_arabian, text, ts-1, te) when 'nbat', 'nabataean' self.emit(type, :script_nabataean, text, ts-1, te) when 'nkoo', 'nko' self.emit(type, :script_nko, text, ts-1, te) when 'ogam', 'ogham' self.emit(type, :script_ogham, text, ts-1, te) when 'olck', 'olchiki' self.emit(type, :script_ol_chiki, text, ts-1, te) when 'orkh', 'oldturkic' self.emit(type, :script_old_turkic, text, ts-1, te) when 'orya', 'oriya' self.emit(type, :script_oriya, text, ts-1, te) when 'osma', 'osmanya' self.emit(type, :script_osmanya, text, ts-1, te) when 'palm', 'palmyrene' self.emit(type, :script_palmyrene, text, ts-1, te) when 'pauc', 'paucinhau' self.emit(type, :script_pau_cin_hau, text, ts-1, te) when 'perm', 'oldpermic' self.emit(type, :script_old_permic, text, ts-1, te) when 'phag', 'phagspa' self.emit(type, :script_phags_pa, text, ts-1, te) when 'phli', 'inscriptionalpahlavi' self.emit(type, :script_inscriptional_pahlavi, text, ts-1, te) when 'phlp', 'psalterpahlavi' self.emit(type, :script_psalter_pahlavi, text, ts-1, te) when 'phnx', 'phoenician' self.emit(type, :script_phoenician, text, ts-1, te) when 'prti', 'inscriptionalparthian' self.emit(type, :script_inscriptional_parthian, text, ts-1, te) when 'rjng', 'rejang' self.emit(type, :script_rejang, text, ts-1, te) when 'runr', 'runic' self.emit(type, :script_runic, text, ts-1, te) when 'samr', 'samaritan' self.emit(type, :script_samaritan, text, ts-1, te) when 'sarb', 'oldsoutharabian' self.emit(type, :script_old_south_arabian, text, ts-1, te) when 'saur', 'saurashtra' self.emit(type, :script_saurashtra, text, ts-1, te) when 'shaw', 'shavian' self.emit(type, :script_shavian, text, ts-1, te) when 'sidd', 'siddham' self.emit(type, :script_siddham, text, ts-1, te) when 'sind', 'khudawadi' self.emit(type, :script_khudawadi, text, ts-1, te) when 'sinh', 'sinhala' self.emit(type, :script_sinhala, text, ts-1, te) when 'sund', 'sundanese' self.emit(type, :script_sundanese, text, ts-1, te) when 'sylo', 'sylotinagri' self.emit(type, :script_syloti_nagri, text, ts-1, te) when 'syrc', 'syriac' self.emit(type, :script_syriac, text, ts-1, te) when 'tagb', 'tagbanwa' self.emit(type, :script_tagbanwa, text, ts-1, te) when 'tale', 'taile' self.emit(type, :script_tai_le, text, ts-1, te) when 'talu', 'newtailue' self.emit(type, :script_new_tai_lue, text, ts-1, te) when 'taml', 'tamil' self.emit(type, :script_tamil, text, ts-1, te) when 'tavt', 'taiviet' self.emit(type, :script_tai_viet, text, ts-1, te) when 'telu', 'telugu' self.emit(type, :script_telugu, text, ts-1, te) when 'tfng', 'tifinagh' self.emit(type, :script_tifinagh, text, ts-1, te) when 'tglg', 'tagalog' self.emit(type, :script_tagalog, text, ts-1, te) when 'thaa', 'thaana' self.emit(type, :script_thaana, text, ts-1, te) when 'thai' self.emit(type, :script_thai, text, ts-1, te) when 'tibt', 'tibetan' self.emit(type, :script_tibetan, text, ts-1, te) when 'tirh', 'tirhuta' self.emit(type, :script_tirhuta, text, ts-1, te) when 'ugar', 'ugaritic' self.emit(type, :script_ugaritic, text, ts-1, te) when 'vaii', 'vai' self.emit(type, :script_vai, text, ts-1, te) when 'wara', 'warangciti' self.emit(type, :script_warang_citi, text, ts-1, te) when 'xpeo', 'oldpersian' self.emit(type, :script_old_persian, text, ts-1, te) when 'xsux', 'cuneiform' self.emit(type, :script_cuneiform, text, ts-1, te) when 'yiii', 'yi' self.emit(type, :script_yi, text, ts-1, te) when 'zinh', 'inherited', 'qaai' self.emit(type, :script_inherited, text, ts-1, te) when 'zyyy', 'common' self.emit(type, :script_common, text, ts-1, te) when 'zzzz', 'unknown' self.emit(type, :script_unknown, text, ts-1, te) else # Should this really be an error? Or would emitting # an :unknown for the property be better? # # self.emit(type, :unknown, text, ts-1, te) raise UnknownUnicodePropertyError.new(name) end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 67 then line 152 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin set_type = set_depth > 1 ? :subset : :set set_depth -= 1; in_set = set_depth > 0 ? true : false emit(set_type, :close, *text(data, ts, te)) if set_depth == 0 begin cs = 139 _goto_level = _again next nd else begin top -= 1 cs = stack[top] _goto_level = _again next nd end end end hen 71 then line 165 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin # special case, emits two tokens set_type = set_depth > 1 ? :subset : :set set_depth -= 1; in_set = set_depth > 0 ? true : false emit(set_type, :member, copy(data, ts..te-2), ts, te) emit(set_type, :close, copy(data, ts+1..te-1), ts, te) if set_depth == 0 begin cs = 139 _goto_level = _again next nd else begin top -= 1 cs = stack[top] _goto_level = _again next nd end end end hen 68 then line 179 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te).first if @tokens.last[1] == :open emit(set_type, :negate, text, ts, te) else emit(set_type, :member, text, ts, te) end end end hen 19 then line 188 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :range, *text(data, ts, te)) end end hen 70 then line 192 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :intersection, *text(data, ts, te)) end end hen 66 then line 196 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin begin stack[top] = cs top+= 1 cs = 167 _goto_level = _again next nd end end hen 23 then line 208 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te).first class_name = text[2..-3] if class_name[0].chr == '^' class_name = "non#{class_name[1..-1]}" end token_sym = "class_#{class_name}".to_sym emit(set_type, token_sym, text, ts, te) end end hen 22 then line 220 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :collation, *text(data, ts, te)) end end hen 24 then line 224 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :equivalent, *text(data, ts, te)) end end hen 65 then line 230 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :member, *text(data, ts, te)) end end hen 64 then line 238 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :member, *text(data, ts, te)) end end hen 73 then line 200 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin set_depth += 1; in_set = true set_type = set_depth > 1 ? :subset : :set emit(set_type, :open, *text(data, ts, te)) begin stack[top] = cs top+= 1 cs = 155 _goto_level = _again next nd end end hen 69 then line 238 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(set_type, :member, *text(data, ts, te)) end end hen 21 then line 200 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin set_depth += 1; in_set = true set_type = set_depth > 1 ? :subset : :set emit(set_type, :open, *text(data, ts, te)) begin stack[top] = cs top+= 1 cs = 155 _goto_level = _again next nd end end hen 17 then line 238 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin emit(set_type, :member, *text(data, ts, te)) end end hen 78 then line 246 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :backspace, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 76 then line 251 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te, 1).first when '\d'; emit(set_type, :type_digit, text, ts-1, te) when '\D'; emit(set_type, :type_nondigit, text, ts-1, te) when '\h'; emit(set_type, :type_hex, text, ts-1, te) when '\H'; emit(set_type, :type_nonhex, text, ts-1, te) when '\s'; emit(set_type, :type_space, text, ts-1, te) when '\S'; emit(set_type, :type_nonspace, text, ts-1, te) when '\w'; emit(set_type, :type_word, text, ts-1, te) when '\W'; emit(set_type, :type_nonword, text, ts-1, te) end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 82 then line 265 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :range_hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 74 then line 275 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(set_type, :escape, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 77 then line 280 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin p = p - 1; cs = 155; begin stack[top] = cs top+= 1 cs = 154 _goto_level = _again next nd begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 81 then line 265 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(set_type, :range_hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 80 then line 270 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(set_type, :member_hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 79 then line 292 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(set_type, :escape, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 27 then line 270 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin emit(set_type, :member_hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 25 then line 1 "NONE" begin ase act hen 0 then egin begin cs = 0 _goto_level = _again next nd d hen 18 then egin begin p = ((te))-1; end emit(set_type, :escape, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end hen 20 then egin begin p = ((te))-1; end emit(set_type, :escape, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end d end hen 87 then line 302 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te, 1).first emit(:backref, :number, text, ts-1, te) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 92 then line 308 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:escape, :octal, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 84 then line 313 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te, 1).first when '\.'; emit(:escape, :dot, text, ts-1, te) when '\|'; emit(:escape, :alternation, text, ts-1, te) when '\^'; emit(:escape, :bol, text, ts-1, te) when '\$'; emit(:escape, :eol, text, ts-1, te) when '\?'; emit(:escape, :zero_or_one, text, ts-1, te) when '\*'; emit(:escape, :zero_or_more, text, ts-1, te) when '\+'; emit(:escape, :one_or_more, text, ts-1, te) when '\('; emit(:escape, :group_open, text, ts-1, te) when '\)'; emit(:escape, :group_close, text, ts-1, te) when '\{'; emit(:escape, :interval_open, text, ts-1, te) when '\}'; emit(:escape, :interval_close, text, ts-1, te) when '\['; emit(:escape, :set_open, text, ts-1, te) when '\]'; emit(:escape, :set_close, text, ts-1, te) when "\\\\"; emit(:escape, :backslash, text, ts-1, te) end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 89 then line 334 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin # \b is emitted as backspace only when inside a character set, otherwise # it is a word boundary anchor. A syntax might "normalize" it if needed. case text = text(data, ts, te, 1).first when '\a'; emit(:escape, :bell, text, ts-1, te) when '\e'; emit(:escape, :escape, text, ts-1, te) when '\f'; emit(:escape, :form_feed, text, ts-1, te) when '\n'; emit(:escape, :newline, text, ts-1, te) when '\r'; emit(:escape, :carriage, text, ts-1, te) when '\s'; emit(:escape, :space, text, ts-1, te) when '\t'; emit(:escape, :tab, text, ts-1, te) when '\v'; emit(:escape, :vertical_tab, text, ts-1, te) end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 32 then line 350 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te, 1).first if text[2].chr == '{' emit(:escape, :codepoint_list, text, ts-1, te) else emit(:escape, :codepoint, text, ts-1, te) end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 97 then line 360 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:escape, :hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 100 then line 365 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:escape, :hex_wide, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 34 then line 374 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin raise InvalidSequenceError.new("wide hex sequence") begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 28 then line 379 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :control, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("control sequence") end else raise PrematureEndError.new("control sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 31 then line 394 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("meta sequence") end else raise PrematureEndError.new("meta sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 88 then line 409 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin p = p - 1; cs = 139; begin stack[top] = cs top+= 1 cs = 154 _goto_level = _again next nd begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 83 then line 415 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:escape, :literal, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 91 then line 308 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(:escape, :octal, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 96 then line 360 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(:escape, :hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 98 then line 374 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin raise InvalidSequenceError.new("wide hex sequence") begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 94 then line 394 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("meta sequence") end else raise PrematureEndError.new("meta sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 35 then line 374 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin raise InvalidSequenceError.new("wide hex sequence") begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 30 then line 394 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("meta sequence") end else raise PrematureEndError.new("meta sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 90 then line 1 "NONE" begin ase act hen 21 then egin begin p = ((te))-1; end text = text(data, ts, te, 1).first emit(:backref, :number, text, ts-1, te) begin top -= 1 cs = stack[top] _goto_level = _again next nd end hen 22 then egin begin p = ((te))-1; end emit(:escape, :octal, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end d end hen 37 then line 425 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te-1).first emit(:conditional, :condition, text, ts, te-1) emit(:conditional, :condition_close, ')', te-1, te) end end hen 101 then line 431 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin p = p - 1; begin stack[top] = cs top+= 1 cs = 139 _goto_level = _again next nd end end hen 102 then line 431 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin p = p - 1; begin stack[top] = cs top+= 1 cs = 139 _goto_level = _again next nd end end hen 36 then line 431 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin p = p - 1; begin stack[top] = cs top+= 1 cs = 139 _goto_level = _again next nd end end hen 43 then line 444 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:meta, :dot, *text(data, ts, te)) end end hen 46 then line 448 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin if in_conditional and conditional_stack.length > 0 and conditional_stack.last[1] == @group_depth emit(:conditional, :separator, *text(data, ts, te)) else emit(:meta, :alternation, *text(data, ts, te)) end end end hen 45 then line 459 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:anchor, :bol, *text(data, ts, te)) end end hen 40 then line 463 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:anchor, :eol, *text(data, ts, te)) end end hen 60 then line 467 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:keep, :mark, *text(data, ts, te)) end end hen 58 then line 471 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '\\A'; emit(:anchor, :bos, text, ts, te) when '\\z'; emit(:anchor, :eos, text, ts, te) when '\\Z'; emit(:anchor, :eos_ob_eol, text, ts, te) when '\\b'; emit(:anchor, :word_boundary, text, ts, te) when '\\B'; emit(:anchor, :nonword_boundary, text, ts, te) when '\\G'; emit(:anchor, :match_start, text, ts, te) else raise ScannerError.new( "Unexpected character in anchor at #{text} (char #{ts})") end end end hen 59 then line 491 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '\\d'; emit(:type, :digit, text, ts, te) when '\\D'; emit(:type, :nondigit, text, ts, te) when '\\h'; emit(:type, :hex, text, ts, te) when '\\H'; emit(:type, :nonhex, text, ts, te) when '\\s'; emit(:type, :space, text, ts, te) when '\\S'; emit(:type, :nonspace, text, ts, te) when '\\w'; emit(:type, :word, text, ts, te) when '\\W'; emit(:type, :nonword, text, ts, te) else raise ScannerError.new( "Unexpected character in type at #{text} (char #{ts})") end end end hen 44 then line 510 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin set_depth += 1; in_set = true set_type = set_depth > 1 ? :subset : :set emit(set_type, :open, *text(data, ts, te)) begin stack[top] = cs top+= 1 cs = 155 _goto_level = _again next nd end end hen 8 then line 522 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin text = text(data, ts, te).first in_conditional = true unless in_conditional conditional_depth += 1 conditional_stack << [conditional_depth, @group_depth] emit(:conditional, :open, text[0..-2], ts, te-1) emit(:conditional, :condition_open, '(', te-1, te) begin stack[top] = cs top+= 1 cs = 207 _goto_level = _again next nd end end hen 9 then line 555 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin p = scan_options(p, data, ts, te) end end hen 6 then line 565 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '(?='; emit(:assertion, :lookahead, text, ts, te) when '(?!'; emit(:assertion, :nlookahead, text, ts, te) when '(?<='; emit(:assertion, :lookbehind, text, ts, te) when '(?<!'; emit(:assertion, :nlookbehind, text, ts, te) end end end hen 10 then line 581 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '(?:'; emit(:group, :passive, text, ts, te) when '(?>'; emit(:group, :atomic, text, ts, te) when /^\(\?<(\w*)>/ empty_name_error(:group, 'named group (ab)') if $1.empty? emit(:group, :named_ab, text, ts, te) when /^\(\?'(\w*)'/ empty_name_error(:group, 'named group (sq)') if $1.empty? emit(:group, :named_sq, text, ts, te) else raise ScannerError.new( "Unknown subexpression group format '#{text}'") end end end hen 13 then line 636 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when /^\\([gk])<>/ # angle brackets empty_backref_error("ref/call (ab)") when /^\\([gk])''/ # single quotes empty_backref_error("ref/call (sq)") when /^\\([gk])<[^\d-](\w+)?>/ # angle-brackets if $1 == 'k' emit(:backref, :name_ref_ab, text, ts, te) else emit(:backref, :name_call_ab, text, ts, te) end when /^\\([gk])'[^\d-](\w+)?'/ #single quotes if $1 == 'k' emit(:backref, :name_ref_sq, text, ts, te) else emit(:backref, :name_call_sq, text, ts, te) end when /^\\([gk])<\d+>/ # angle-brackets if $1 == 'k' emit(:backref, :number_ref_ab, text, ts, te) else emit(:backref, :number_call_ab, text, ts, te) end when /^\\([gk])'\d+'/ # single quotes if $1 == 'k' emit(:backref, :number_ref_sq, text, ts, te) else emit(:backref, :number_call_sq, text, ts, te) end when /^\\([gk])<-\d+>/ # angle-brackets if $1 == 'k' emit(:backref, :number_rel_ref_ab, text, ts, te) else emit(:backref, :number_rel_call_ab, text, ts, te) end when /^\\([gk])'-\d+'/ # single quotes if $1 == 'k' emit(:backref, :number_rel_ref_sq, text, ts, te) else emit(:backref, :number_rel_call_sq, text, ts, te) end when /^\\k<[^\d-](\w+)?[+\-]\d+>/ # angle-brackets emit(:backref, :name_nest_ref_ab, text, ts, te) when /^\\k'[^\d-](\w+)?[+\-]\d+'/ # single-quotes emit(:backref, :name_nest_ref_sq, text, ts, te) when /^\\([gk])<\d+[+\-]\d+>/ # angle-brackets emit(:backref, :number_nest_ref_ab, text, ts, te) when /^\\([gk])'\d+[+\-]\d+'/ # single-quotes emit(:backref, :number_nest_ref_sq, text, ts, te) else raise ScannerError.new( "Unknown backreference format '#{text}'") end end end hen 56 then line 707 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '?' ; emit(:quantifier, :zero_or_one, text, ts, te) when '??'; emit(:quantifier, :zero_or_one_reluctant, text, ts, te) when '?+'; emit(:quantifier, :zero_or_one_possessive, text, ts, te) end end end hen 52 then line 715 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '*' ; emit(:quantifier, :zero_or_more, text, ts, te) when '*?'; emit(:quantifier, :zero_or_more_reluctant, text, ts, te) when '*+'; emit(:quantifier, :zero_or_more_possessive, text, ts, te) end end end hen 54 then line 723 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin case text = text(data, ts, te).first when '+' ; emit(:quantifier, :one_or_more, text, ts, te) when '+?'; emit(:quantifier, :one_or_more_reluctant, text, ts, te) when '++'; emit(:quantifier, :one_or_more_possessive, text, ts, te) end end end hen 62 then line 731 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:quantifier, :interval, *text(data, ts, te)) end end hen 4 then line 741 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin if @free_spacing emit(:free_space, :comment, *text(data, ts, te)) else append_literal(data, ts, te) end end end hen 49 then line 602 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin text = text(data, ts, te).first emit(:group, :capture, text, ts, te) end end hen 55 then line 707 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin case text = text(data, ts, te).first when '?' ; emit(:quantifier, :zero_or_one, text, ts, te) when '??'; emit(:quantifier, :zero_or_one_reluctant, text, ts, te) when '?+'; emit(:quantifier, :zero_or_one_possessive, text, ts, te) end end end hen 51 then line 715 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin case text = text(data, ts, te).first when '*' ; emit(:quantifier, :zero_or_more, text, ts, te) when '*?'; emit(:quantifier, :zero_or_more_reluctant, text, ts, te) when '*+'; emit(:quantifier, :zero_or_more_possessive, text, ts, te) end end end hen 53 then line 723 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin case text = text(data, ts, te).first when '+' ; emit(:quantifier, :one_or_more, text, ts, te) when '+?'; emit(:quantifier, :one_or_more_reluctant, text, ts, te) when '++'; emit(:quantifier, :one_or_more_possessive, text, ts, te) end end end hen 61 then line 731 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(:quantifier, :interval, *text(data, ts, te)) end end hen 57 then line 737 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin begin stack[top] = cs top+= 1 cs = 177 _goto_level = _again next nd end end hen 48 then line 749 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin if @free_spacing emit(:free_space, :whitespace, *text(data, ts, te)) else append_literal(data, ts, te) end end end hen 47 then line 764 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin append_literal(data, ts, te) end end hen 5 then line 602 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin text = text(data, ts, te).first emit(:group, :capture, text, ts, te) end end hen 12 then line 737 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin begin stack[top] = cs top+= 1 cs = 177 _goto_level = _again next nd end end hen 3 then line 764 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin append_literal(data, ts, te) end end hen 1 then line 1 "NONE" begin ase act hen 0 then egin begin cs = 0 _goto_level = _again next nd d hen 59 then egin begin p = ((te))-1; end append_literal(data, ts, te) end d end hen 72 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 200 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin set_depth += 1; in_set = true set_type = set_depth > 1 ? :subset : :set emit(set_type, :open, *text(data, ts, te)) begin stack[top] = cs top+= 1 cs = 155 _goto_level = _again next nd end end hen 20 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 200 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin set_depth += 1; in_set = true set_type = set_depth > 1 ? :subset : :set emit(set_type, :open, *text(data, ts, te)) begin stack[top] = cs top+= 1 cs = 155 _goto_level = _again next nd end end hen 95 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 360 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin emit(:escape, :hex, *text(data, ts, te, 1)) begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 99 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 374 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin raise InvalidSequenceError.new("wide hex sequence") begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 93 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 394 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p = p - 1; begin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("meta sequence") end else raise PrematureEndError.new("meta sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 29 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 394 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin egin p = ((te))-1; end egin if data[te] c = data[te].chr if c =~ /[\x00-\x7F]/ emit(:escape, :meta_sequence, copy(data, ts-1..te), ts-1, te+1) p += 1 else raise InvalidSequenceError.new("meta sequence") end else raise PrematureEndError.new("meta sequence") end begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 33 then line 139 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise InvalidSequenceError.new('sequence', text) end line 370 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin begin top -= 1 cs = stack[top] _goto_level = _again next nd end end hen 50 then line 146 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth -= 1; @in_group = @group_depth > 0 ? true : false end line 145 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth += 1; @in_group = true end hen 11 then line 146 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth -= 1; @in_group = @group_depth > 0 ? true : false end line 540 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin emit(:group, :comment, *text(data, ts, te)) end end hen 42 then line 146 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth -= 1; @in_group = @group_depth > 0 ? true : false end line 607 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin = p+1 egin if in_conditional and conditional_stack.last and conditional_stack.last[1] == (@group_depth + 1) emit(:conditional, :close, *text(data, ts, te)) conditional_stack.pop if conditional_stack.length == 0 in_conditional = false end else if @spacing_stack.length > 1 and @spacing_stack.last[1] == (@group_depth + 1) @spacing_stack.pop @free_spacing = @spacing_stack.last[0] if @spacing_stack.length == 1 @in_options = false end end emit(:group, :close, *text(data, ts, te)) end end end hen 41 then line 1 "NONE" begin = p+1 end line 145 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin group_depth += 1; @in_group = true end hen 75 then line 1 "NONE" begin = p+1 end line 275 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin t = 18; end hen 26 then line 1 "NONE" begin = p+1 end line 292 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin t = 20; end hen 86 then line 1 "NONE" begin = p+1 end line 302 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin t = 21; end hen 85 then line 1 "NONE" begin = p+1 end line 308 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin t = 22; end hen 2 then line 1 "NONE" begin = p+1 end line 764 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin t = 59; end line 3795 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" nd nd nd f _goto_level <= _again ase _re_scanner_to_state_actions[cs] hen 63 then line 1 "NONE" begin = nil; end hen 38 then line 1 "NONE" begin = nil; end line 1 "NONE" begin t = 0 end line 3813 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" nd f cs == 0 _goto_level = _out next nd += 1 f p != pe _goto_level = _resume next nd nd f _goto_level <= _test_eof f p == eof f _re_scanner_eof_trans[cs] > 0 _trans = _re_scanner_eof_trans[cs] - 1; _goto_level = _eof_trans next; nd case _re_scanner_eof_actions[cs] hen 15 then line 53 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/property.rl" begin raise PrematureEndError.new('unicode property') end hen 14 then line 133 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" begin text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise PrematureEndError.new( text ) end line 3847 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb" end nd nd f _goto_level <= _out break nd d nd line 861 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl" if cs == re_scanner_error text = ts ? copy(data, ts-1..-1) : data.pack('c*') raise ScannerError.new("Scan error at '#{text}'") end raise PrematureEndError.new("(missing group closing paranthesis) "+ "[#{@in_group}:#{@group_depth}]") if @in_group raise PrematureEndError.new("(missing set closing bracket) "+ "[#{in_set}:#{set_depth}]") if in_set # when the entire expression is a literal run emit_literal if @literal @tokens end
def self.scan_options(p, data, ts, te)
ambiguity, so we just ask it to find the beginning of what looks
Ragel's regex-based scan of the group options introduced a lot of
def self.scan_options(p, data, ts, te) text = text(data, ts, te).first options_char, options_length = true, 0 # Copy while we have option characters. There is no maximum length, # as ruby allows things like '(?xxxxxxxxx-xxxxxxxxxxxxx:abc)'. negative_options = false while options_char if data[te + options_length] c = data[te + options_length].chr if c =~ /[-mixdau]/ negative_options = true if c == '-' raise InvalidGroupOption.new(c, text) if negative_options and c =~ /[dau]/ text << c ; p += 1 ; options_length += 1 else options_char = false end else raise PrematureEndError.new("expression options `#{text}'") end end if data[te + options_length] c = data[te + options_length].chr if c == ':' # Include the ':' in the options text text << c ; p += 1 ; options_length += 1 emit_options(text, ts, te + options_length) elsif c == ')' # Don't include the closing ')', let group_close handle it. emit_options(text, ts, te + options_length) else # Plain Regexp reports this as 'undefined group option' raise ScannerError.new( "Unexpected `#{c}' in options sequence, ':' or ')' expected") end else raise PrematureEndError.new("expression options `#{text}'") end p # return the new value of the data pointer end
def self.text(data, ts, te, soff = 0)
Copy from ts to te from data as text, returning an array with the text
def self.text(data, ts, te, soff = 0) [copy(data, ts-soff..te-1), ts-soff, te] end
def self.validation_error(type, what, reason)
Centralizes and unifies the handling of validation related
def self.validation_error(type, what, reason) case type when :group error = InvalidGroupError.new(what, reason) when :backref error = InvalidBackrefError.new(what, reason) when :sequence error = InvalidSequenceError.new(what, reason) else error = ValidationError.new('expression') end raise error # unless @@config.validation_ignore end