lib/regexp_parser/syntax/tokens/unicode_property.rb



module Regexp::Syntax
  module Token

    module UnicodeProperty
      CharType = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
                  :print, :punct, :space, :upper, :word, :xdigit]

      POSIX  = [:any, :assigned, :newline]

      module Category
        Letter        = [:letter_any, :letter_uppercase, :letter_lowercase,
                         :letter_titlecase, :letter_modifier, :letter_other]

        Mark          = [:mark_any, :mark_nonspacing, :mark_spacing,
                         :mark_enclosing]

        Number        = [:number_any, :number_decimal, :number_letter,
                         :number_other]

        Punctuation   = [:punct_any, :punct_connector, :punct_dash,
                         :punct_open, :punct_close, :punct_initial,
                         :punct_final, :punct_other]

        Symbol        = [:symbol_any, :symbol_math, :symbol_currency,
                         :symbol_modifier, :symbol_other]

        Separator     = [:separator_any, :separator_space, :separator_line,
                         :separator_para]

        Codepoint     = [:other, :control, :format,
                         :surrogate, :private_use, :unassigned]

        All = Letter + Mark + Number + Punctuation +
              Symbol + Separator + Codepoint
      end

      # As of ruby version 1.9.3
      Age_V193 = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
                  :age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
                  :age_5_2, :age_6_0]

      Age_V200 = [:age_6_1]

      # These were merged (from Onigmo) in the branch for 2.2.0
      Age_V220 = [:age_6_2, :age_6_3, :age_7_0]

      Age = Age_V193 + Age_V200 + Age_V220

      Derived = [
        :ascii_hex,
        :alphabetic,
        :cased,
        :changes_when_casefolded,
        :changes_when_casemapped,
        :changes_when_lowercased,
        :changes_when_titlecased,
        :changes_when_uppercased,
        :case_ignorable,
        :bidi_control,
        :dash,
        :deprecated,
        :default_ignorable_cp,
        :diacritic,
        :extender,
        :grapheme_base,
        :grapheme_extend,
        :grapheme_link,
        :hex_digit,
        :hyphen,
        :id_continue,
        :ideographic,
        :id_start,
        :ids_binary_op,
        :ids_trinary_op,
        :join_control,
        :logical_order_exception,
        :lowercase,
        :math,
        :non_character_cp,
        :other_alphabetic,
        :other_default_ignorable_cp,
        :other_grapheme_extended,
        :other_id_continue,
        :other_id_start,
        :other_lowercase,
        :other_math,
        :other_uppercase,
        :pattern_syntax,
        :pattern_whitespace,
        :quotation_mark,
        :radical,
        :soft_dotted,
        :sentence_terminal,
        :terminal_punctuation,
        :unified_ideograph,
        :uppercase,
        :variation_selector,
        :whitespace,
        :xid_start,
        :xid_continue,
      ]

      Script =[
        :script_arabic,
        :script_imperial_aramaic,
        :script_armenian,
        :script_avestan,
        :script_balinese,
        :script_bamum,
        :script_bengali,
        :script_bopomofo,
        :script_braille,
        :script_buginese,
        :script_buhid,
        :script_canadian_aboriginal,
        :script_carian,
        :script_cham,
        :script_cherokee,
        :script_coptic,
        :script_cypriot,
        :script_cyrillic,
        :script_devanagari,
        :script_deseret,
        :script_egyptian_hieroglyphs,
        :script_ethiopic,
        :script_georgian,
        :script_glagolitic,
        :script_gothic,
        :script_greek,
        :script_gujarati,
        :script_gurmukhi,
        :script_hangul,
        :script_han,
        :script_hanunoo,
        :script_hebrew,
        :script_hiragana,
        :script_katakana_or_hiragana,
        :script_old_italic,
        :script_javanese,
        :script_kayah_li,
        :script_katakana,
        :script_kharoshthi,
        :script_khmer,
        :script_kannada,
        :script_kaithi,
        :script_tai_tham,
        :script_lao,
        :script_latin,
        :script_lepcha,
        :script_limbu,
        :script_linear_b,
        :script_lisu,
        :script_lycian,
        :script_lydian,
        :script_malayalam,
        :script_mongolian,
        :script_meetei_mayek,
        :script_myanmar,
        :script_nko,
        :script_ogham,
        :script_ol_chiki,
        :script_old_turkic,
        :script_oriya,
        :script_osmanya,
        :script_phags_pa,
        :script_inscriptional_pahlavi,
        :script_phoenician,
        :script_inscriptional_parthian,
        :script_rejang,
        :script_runic,
        :script_samaritan,
        :script_old_south_arabian,
        :script_saurashtra,
        :script_shavian,
        :script_sinhala,
        :script_sundanese,
        :script_syloti_nagri,
        :script_syriac,
        :script_tagbanwa,
        :script_tai_le,
        :script_new_tai_lue,
        :script_tamil,
        :script_tai_viet,
        :script_telugu,
        :script_tifinagh,
        :script_tagalog,
        :script_thaana,
        :script_thai,
        :script_tibetan,
        :script_ugaritic,
        :script_vai,
        :script_old_persian,
        :script_cuneiform,
        :script_yi,
        :script_inherited,
        :script_common,
        :script_unknown
      ]

      Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]

      Script_7_0 = [
        :script_caucasian_albanian,
        :script_bassa_vah,
        :script_duployan,
        :script_elbasan,
        :script_grantha,
        :script_pahawh_hmong,
        :script_khojki,
        :script_linear_a,
        :script_mahajani,
        :script_manichaean,
        :script_mende_kikakui,
        :script_modi,
        :script_mro,
        :script_old_north_arabian,
        :script_nabataean,
        :script_palmyrene,
        :script_pau_cin_hau,
        :script_old_permic,
        :script_psalter_pahlavi,
        :script_siddham,
        :script_khudawadi,
        :script_tirhuta,
        :script_warang_citi
      ]

      V190 = CharType + POSIX + Category::All + Derived + Script
      V193 = Age_V193 + Script_6_0

      V200 = Age_V200

      V220 = Age_V220 + Script_7_0

      All  = V190 + V193 + V200 + V220

      Type = :property
      NonType = :nonproperty
    end

    Map[UnicodeProperty::Type] = UnicodeProperty::All
    Map[UnicodeProperty::NonType] = UnicodeProperty::All

  end
end