lib/regexp_parser/syntax/tokens/unicode_property.rb



module Regexp::Syntax
  module Token

    module UnicodeProperty
      CharType_V1_9_0 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
                       :lower, :print, :punct, :space, :upper, :word, :xdigit]

      CharType_V2_5_0 = [:xposixpunct]

      POSIX  = [:any, :assigned, :newline]

      module Category
        Letter        = [:letter, :uppercase_letter, :lowercase_letter,
                         :titlecase_letter, :modifier_letter, :other_letter]

        Mark          = [:mark, :nonspacing_mark, :spacing_mark,
                         :enclosing_mark]

        Number        = [:number, :decimal_number, :letter_number,
                         :other_number]

        Punctuation   = [:punctuation, :connector_punctuation, :dash_punctuation,
                         :open_punctuation, :close_punctuation, :initial_punctuation,
                         :final_punctuation, :other_punctuation]

        Symbol        = [:symbol, :math_symbol, :currency_symbol,
                         :modifier_symbol, :other_symbol]

        Separator     = [:separator, :space_separator, :line_separator,
                         :paragraph_separator]

        Codepoint     = [:other, :control, :format,
                         :surrogate, :private_use, :unassigned]

        All = Letter + Mark + Number + Punctuation +
              Symbol + Separator + Codepoint
      end

      # As of ruby version 1.9.3
      Age_V1_9_3 = [:'age=1.1', :'age=2.0', :'age=2.1', :'age=3.0', :'age=3.1',
                    :'age=3.2', :'age=4.0', :'age=4.1', :'age=5.0', :'age=5.1',
                    :'age=5.2', :'age=6.0']

      Age_V2_0_0 = [:'age=6.1']

      # These were merged (from Onigmo) in the branch for 2.2.0
      Age_V2_2_0 = [:'age=6.2', :'age=6.3', :'age=7.0']

      Age_V2_3_0 = [:'age=8.0']

      Age_V2_4_0 = [:'age=9.0']

      Age_V2_5_0 = [:'age=10.0']

      Age = Age_V1_9_3 + Age_V2_0_0 + Age_V2_2_0 + Age_V2_3_0 + Age_V2_4_0 + Age_V2_5_0

      Derived_V1_9_0 = [
        :ascii_hex_digit,
        :alphabetic,
        :cased,
        :changes_when_casefolded,
        :changes_when_casemapped,
        :changes_when_lowercased,
        :changes_when_titlecased,
        :changes_when_uppercased,
        :case_ignorable,
        :bidi_control,
        :dash,
        :deprecated,
        :default_ignorable_code_point,
        :diacritic,
        :extender,
        :grapheme_base,
        :grapheme_extend,
        :grapheme_link,
        :hex_digit,
        :hyphen,
        :id_continue,
        :ideographic,
        :id_start,
        :ids_binary_operator,
        :ids_trinary_operator,
        :join_control,
        :logical_order_exception,
        :lowercase,
        :math,
        :noncharacter_code_point,
        :other_alphabetic,
        :other_default_ignorable_code_point,
        :other_grapheme_extend,
        :other_id_continue,
        :other_id_start,
        :other_lowercase,
        :other_math,
        :other_uppercase,
        :pattern_syntax,
        :pattern_white_space,
        :quotation_mark,
        :radical,
        :sentence_terminal,
        :soft_dotted,
        :terminal_punctuation,
        :unified_ideograph,
        :uppercase,
        :variation_selector,
        :white_space,
        :xid_start,
        :xid_continue,
      ]

      Derived_V2_0_0 = [
        :cased_letter,
        :combining_mark,
      ]

      Derived_V2_4_0 = [
        :prepended_concatenation_mark,
      ]

      Derived_V2_5_0 = [
        :regional_indicator
      ]

      Derived = Derived_V1_9_0 + Derived_V2_0_0 + Derived_V2_4_0 + Derived_V2_5_0

      Script_V1_9_0 = [
        :arabic,
        :imperial_aramaic,
        :armenian,
        :avestan,
        :balinese,
        :bamum,
        :bengali,
        :bopomofo,
        :braille,
        :buginese,
        :buhid,
        :canadian_aboriginal,
        :carian,
        :cham,
        :cherokee,
        :coptic,
        :cypriot,
        :cyrillic,
        :devanagari,
        :deseret,
        :egyptian_hieroglyphs,
        :ethiopic,
        :georgian,
        :glagolitic,
        :gothic,
        :greek,
        :gujarati,
        :gurmukhi,
        :hangul,
        :han,
        :hanunoo,
        :hebrew,
        :hiragana,
        :old_italic,
        :javanese,
        :kayah_li,
        :katakana,
        :kharoshthi,
        :khmer,
        :kannada,
        :kaithi,
        :tai_tham,
        :lao,
        :latin,
        :lepcha,
        :limbu,
        :linear_b,
        :lisu,
        :lycian,
        :lydian,
        :malayalam,
        :mongolian,
        :meetei_mayek,
        :myanmar,
        :nko,
        :ogham,
        :ol_chiki,
        :old_turkic,
        :oriya,
        :osmanya,
        :phags_pa,
        :inscriptional_pahlavi,
        :phoenician,
        :inscriptional_parthian,
        :rejang,
        :runic,
        :samaritan,
        :old_south_arabian,
        :saurashtra,
        :shavian,
        :sinhala,
        :sundanese,
        :syloti_nagri,
        :syriac,
        :tagbanwa,
        :tai_le,
        :new_tai_lue,
        :tamil,
        :tai_viet,
        :telugu,
        :tifinagh,
        :tagalog,
        :thaana,
        :thai,
        :tibetan,
        :ugaritic,
        :vai,
        :old_persian,
        :cuneiform,
        :yi,
        :inherited,
        :common,
        :unknown
      ]

      Script_V1_9_3 = [
        :brahmi,
        :batak,
        :mandaic
      ]

      Script_V2_0_0 = [
        :chakma,
        :meroitic_cursive,
        :meroitic_hieroglyphs,
        :miao,
        :sharada,
        :sora_sompeng,
        :takri,
      ]

      Script_V2_2_0 = [
        :caucasian_albanian,
        :bassa_vah,
        :duployan,
        :elbasan,
        :grantha,
        :pahawh_hmong,
        :khojki,
        :linear_a,
        :mahajani,
        :manichaean,
        :mende_kikakui,
        :modi,
        :mro,
        :old_north_arabian,
        :nabataean,
        :palmyrene,
        :pau_cin_hau,
        :old_permic,
        :psalter_pahlavi,
        :siddham,
        :khudawadi,
        :tirhuta,
        :warang_citi
      ]

      Script_V2_3_0 = [
        :ahom,
        :anatolian_hieroglyphs,
        :hatran,
        :multani,
        :old_hungarian,
        :signwriting,
      ]

      Script_V2_4_0 = [
        :adlam,
        :bhaiksuki,
        :marchen,
        :newa,
        :osage,
        :tangut,
      ]

      Script_V2_5_0 = [
        :masaram_gondi,
        :nushu,
        :soyombo,
        :zanabazar_square,
      ]

      Script = Script_V1_9_0 + Script_V1_9_3 + Script_V2_0_0 +
               Script_V2_2_0 + Script_V2_3_0 + Script_V2_4_0 + Script_V2_5_0

      UnicodeBlock_V1_9_0 = [
        :in_alphabetic_presentation_forms,
        :in_arabic_presentation_forms_a,
        :in_arabic_presentation_forms_b,
        :in_arabic,
        :in_armenian,
        :in_arrows,
        :in_basic_latin,
        :in_bengali,
        :in_block_elements,
        :in_bopomofo_extended,
        :in_bopomofo,
        :in_box_drawing,
        :in_braille_patterns,
        :in_buhid,
        :in_cjk_compatibility_forms,
        :in_cjk_compatibility_ideographs,
        :in_cjk_compatibility,
        :in_cjk_radicals_supplement,
        :in_cjk_symbols_and_punctuation,
        :in_cjk_unified_ideographs_extension_a,
        :in_cjk_unified_ideographs,
        :in_cherokee,
        :in_combining_diacritical_marks_for_symbols,
        :in_combining_diacritical_marks,
        :in_combining_half_marks,
        :in_control_pictures,
        :in_currency_symbols,
        :in_cyrillic_supplement,
        :in_cyrillic,
        :in_devanagari,
        :in_dingbats,
        :in_enclosed_alphanumerics,
        :in_enclosed_cjk_letters_and_months,
        :in_ethiopic,
        :in_general_punctuation,
        :in_geometric_shapes,
        :in_georgian,
        :in_greek_extended,
        :in_greek_and_coptic,
        :in_gujarati,
        :in_gurmukhi,
        :in_halfwidth_and_fullwidth_forms,
        :in_hangul_compatibility_jamo,
        :in_hangul_jamo,
        :in_hangul_syllables,
        :in_hanunoo,
        :in_hebrew,
        :in_high_private_use_surrogates,
        :in_high_surrogates,
        :in_hiragana,
        :in_ipa_extensions,
        :in_ideographic_description_characters,
        :in_kanbun,
        :in_kangxi_radicals,
        :in_kannada,
        :in_katakana_phonetic_extensions,
        :in_katakana,
        :in_khmer_symbols,
        :in_khmer,
        :in_lao,
        :in_latin_1_supplement,
        :in_latin_extended_a,
        :in_latin_extended_b,
        :in_latin_extended_additional,
        :in_letterlike_symbols,
        :in_limbu,
        :in_low_surrogates,
        :in_malayalam,
        :in_mathematical_operators,
        :in_miscellaneous_mathematical_symbols_a,
        :in_miscellaneous_mathematical_symbols_b,
        :in_miscellaneous_symbols_and_arrows,
        :in_miscellaneous_symbols,
        :in_miscellaneous_technical,
        :in_mongolian,
        :in_myanmar,
        :in_number_forms,
        :in_ogham,
        :in_optical_character_recognition,
        :in_oriya,
        :in_phonetic_extensions,
        :in_private_use_area,
        :in_runic,
        :in_sinhala,
        :in_small_form_variants,
        :in_spacing_modifier_letters,
        :in_specials,
        :in_superscripts_and_subscripts,
        :in_supplemental_arrows_a,
        :in_supplemental_arrows_b,
        :in_supplemental_mathematical_operators,
        :in_syriac,
        :in_tagalog,
        :in_tagbanwa,
        :in_tai_le,
        :in_tamil,
        :in_telugu,
        :in_thaana,
        :in_thai,
        :in_tibetan,
        :in_unified_canadian_aboriginal_syllabics,
        :in_variation_selectors,
        :in_yi_radicals,
        :in_yi_syllables,
        :in_yijing_hexagram_symbols,
      ]

      UnicodeBlock_V2_0_0 = [
        :in_aegean_numbers,
        :in_alchemical_symbols,
        :in_ancient_greek_musical_notation,
        :in_ancient_greek_numbers,
        :in_ancient_symbols,
        :in_arabic_extended_a,
        :in_arabic_mathematical_alphabetic_symbols,
        :in_arabic_presentation_forms_a,
        :in_arabic_presentation_forms_b,
        :in_arabic_supplement,
        :in_avestan,
        :in_balinese,
        :in_bamum,
        :in_bamum_supplement,
        :in_batak,
        :in_brahmi,
        :in_buginese,
        :in_byzantine_musical_symbols,
        :in_cjk_compatibility_ideographs_supplement,
        :in_cjk_strokes,
        :in_cjk_unified_ideographs_extension_b,
        :in_cjk_unified_ideographs_extension_c,
        :in_cjk_unified_ideographs_extension_d,
        :in_carian,
        :in_chakma,
        :in_cham,
        :in_combining_diacritical_marks_supplement,
        :in_common_indic_number_forms,
        :in_coptic,
        :in_counting_rod_numerals,
        :in_cuneiform,
        :in_cuneiform_numbers_and_punctuation,
        :in_cypriot_syllabary,
        :in_cyrillic_extended_a,
        :in_cyrillic_extended_b,
        :in_deseret,
        :in_devanagari_extended,
        :in_domino_tiles,
        :in_egyptian_hieroglyphs,
        :in_emoticons,
        :in_enclosed_alphanumeric_supplement,
        :in_enclosed_ideographic_supplement,
        :in_ethiopic_extended,
        :in_ethiopic_extended_a,
        :in_ethiopic_supplement,
        :in_georgian_supplement,
        :in_glagolitic,
        :in_gothic,
        :in_hangul_jamo_extended_a,
        :in_hangul_jamo_extended_b,
        :in_imperial_aramaic,
        :in_inscriptional_pahlavi,
        :in_inscriptional_parthian,
        :in_javanese,
        :in_kaithi,
        :in_kana_supplement,
        :in_kayah_li,
        :in_kharoshthi,
        :in_latin_1_supplement,
        :in_latin_extended_a,
        :in_latin_extended_b,
        :in_latin_extended_c,
        :in_latin_extended_d,
        :in_lepcha,
        :in_linear_b_ideograms,
        :in_linear_b_syllabary,
        :in_lisu,
        :in_lycian,
        :in_lydian,
        :in_mahjong_tiles,
        :in_mandaic,
        :in_mathematical_alphanumeric_symbols,
        :in_meetei_mayek,
        :in_meetei_mayek_extensions,
        :in_meroitic_cursive,
        :in_meroitic_hieroglyphs,
        :in_miao,
        :in_miscellaneous_mathematical_symbols_a,
        :in_miscellaneous_mathematical_symbols_b,
        :in_miscellaneous_symbols_and_pictographs,
        :in_modifier_tone_letters,
        :in_musical_symbols,
        :in_myanmar_extended_a,
        :in_nko,
        :in_new_tai_lue,
        :in_no_block,
        :in_ol_chiki,
        :in_old_italic,
        :in_old_persian,
        :in_old_south_arabian,
        :in_old_turkic,
        :in_osmanya,
        :in_phags_pa,
        :in_phaistos_disc,
        :in_phoenician,
        :in_phonetic_extensions_supplement,
        :in_playing_cards,
        :in_rejang,
        :in_rumi_numeral_symbols,
        :in_samaritan,
        :in_saurashtra,
        :in_sharada,
        :in_shavian,
        :in_sora_sompeng,
        :in_sundanese,
        :in_sundanese_supplement,
        :in_supplemental_arrows_a,
        :in_supplemental_arrows_b,
        :in_supplemental_punctuation,
        :in_supplementary_private_use_area_a,
        :in_supplementary_private_use_area_b,
        :in_syloti_nagri,
        :in_tags,
        :in_tai_tham,
        :in_tai_viet,
        :in_tai_xuan_jing_symbols,
        :in_takri,
        :in_tifinagh,
        :in_transport_and_map_symbols,
        :in_ugaritic,
        :in_unified_canadian_aboriginal_syllabics_extended,
        :in_vai,
        :in_variation_selectors_supplement,
        :in_vedic_extensions,
        :in_vertical_forms,
      ]

      UnicodeBlock_V2_2_0 = [
        :in_bassa_vah,
        :in_caucasian_albanian,
        :in_combining_diacritical_marks_extended,
        :in_coptic_epact_numbers,
        :in_duployan,
        :in_elbasan,
        :in_geometric_shapes_extended,
        :in_grantha,
        :in_khojki,
        :in_khudawadi,
        :in_latin_extended_e,
        :in_linear_a,
        :in_mahajani,
        :in_manichaean,
        :in_mende_kikakui,
        :in_modi,
        :in_mro,
        :in_myanmar_extended_b,
        :in_nabataean,
        :in_old_north_arabian,
        :in_old_permic,
        :in_ornamental_dingbats,
        :in_pahawh_hmong,
        :in_palmyrene,
        :in_pau_cin_hau,
        :in_psalter_pahlavi,
        :in_shorthand_format_controls,
        :in_siddham,
        :in_sinhala_archaic_numbers,
        :in_supplemental_arrows_c,
        :in_tirhuta,
        :in_warang_citi,
      ]

      UnicodeBlock_V2_3_0 = [
        :in_ahom,
        :in_anatolian_hieroglyphs,
        :in_cjk_unified_ideographs_extension_e,
        :in_cherokee_supplement,
        :in_early_dynastic_cuneiform,
        :in_hatran,
        :in_multani,
        :in_old_hungarian,
        :in_supplemental_symbols_and_pictographs,
        :in_sutton_signwriting,
      ]

      UnicodeBlock_V2_4_0 = [
        :in_adlam,
        :in_bhaiksuki,
        :in_cyrillic_extended_c,
        :in_glagolitic_supplement,
        :in_ideographic_symbols_and_punctuation,
        :in_marchen,
        :in_mongolian_supplement,
        :in_newa,
        :in_osage,
        :in_tangut,
        :in_tangut_components,
      ]

      UnicodeBlock_V2_5_0 = [
        :in_cjk_unified_ideographs_extension_f,
        :in_kana_extended_a,
        :in_masaram_gondi,
        :in_nushu,
        :in_soyombo,
        :in_syriac_supplement,
        :in_zanabazar_square,
      ]

      UnicodeBlock = UnicodeBlock_V1_9_0 + UnicodeBlock_V2_0_0 + UnicodeBlock_V2_2_0 +
                     UnicodeBlock_V2_3_0 + UnicodeBlock_V2_4_0 + UnicodeBlock_V2_5_0

      Emoji_V2_5_0 = [
        :emoji,
        :emoji_component,
        :emoji_modifier,
        :emoji_modifier_base,
        :emoji_presentation,
      ]

      Emoji = Emoji_V2_5_0

      V1_9_0 = CharType_V1_9_0 + POSIX + Category::All + Derived_V1_9_0 + Script_V1_9_0 + UnicodeBlock_V1_9_0
      V1_9_3 = Age_V1_9_3 + Script_V1_9_3

      V2_0_0 = Age_V2_0_0 + Derived_V2_0_0 + Script_V2_0_0 + UnicodeBlock_V2_0_0

      V2_2_0 = Age_V2_2_0 + Script_V2_2_0 + UnicodeBlock_V2_2_0

      V2_3_0 = Age_V2_3_0 + Script_V2_3_0 + UnicodeBlock_V2_3_0

      V2_4_0 = Age_V2_4_0 + Derived_V2_4_0 + Script_V2_4_0 + UnicodeBlock_V2_4_0

      V2_5_0 = Age_V2_5_0 + CharType_V2_5_0 + Derived_V2_5_0 + Emoji_V2_5_0 + Script_V2_5_0 + UnicodeBlock_V2_5_0

      All  = V1_9_0 + V1_9_3 + V2_0_0 + V2_2_0 + V2_3_0 + V2_4_0 + V2_5_0

      Type = :property
      NonType = :nonproperty
    end

    Map[UnicodeProperty::Type] = UnicodeProperty::All
    Map[UnicodeProperty::NonType] = UnicodeProperty::All

  end
end