lib/extensions.rb



# frozen_string_literal: true

# @param [String] input_string
# @return [String]
def self.romanize_string(input_string)
    # @type [Array<String>]
    result = []

    input_string.each_char do |char|
        result << case char
        when '。'
            '.'
        when '、', ','
            ','
        when '・'
            '·'
        when '゠'
            '–'
        when '=', 'ー'
            '—'
        when '…', '‥'
            '...'
        when '「', '」', '〈', '〉'
            "'"
        when '『', '』', '《', '》'
            '"'
        when '(', '〔', '⦅', '〘'
            '('
        when ')', '〕', '⦆', '〙'
            ')'
        when '{'
            '{'
        when '}'
            '}'
        when '[', '【', '〖', '〚'
            '['
        when ']', '】', '〗', '〛'
            ']'
        when '〜'
            '~'
        when '?'
            '?'
        when ':'
            ':'
        when '!'
            '!'
        when '※'
            '*'
        when ' '
            ' '
        when 'Ⅰ'
            'I'
        when 'ⅰ'
            'i'
        when 'Ⅱ'
            'II'
        when 'ⅱ'
            'ii'
        when 'Ⅲ'
            'III'
        when 'ⅲ'
            'iii'
        when 'Ⅳ'
            'IV'
        when 'ⅳ'
            'iv'
        when 'Ⅴ'
            'V'
        when 'ⅴ'
            'v'
        when 'Ⅵ'
            'VI'
        when 'ⅵ'
            'vi'
        when 'Ⅶ'
            'VII'
        when 'ⅶ'
            'vii'
        when 'Ⅷ'
            'VIII'
        when 'ⅷ'
            'viii'
        when 'Ⅸ'
            'IX'
        when 'ⅸ'
            'ix'
        when 'Ⅹ'
            'X'
        when 'ⅹ'
            'x'
        when 'Ⅺ'
            'XI'
        when 'ⅺ'
            'xi'
        when 'Ⅻ'
            'XII'
        when 'ⅻ'
            'xii'
        when 'Ⅼ'
            'L'
        when 'ⅼ'
            'l'
        when 'Ⅽ'
            'C'
        when 'ⅽ'
            'c'
        when 'Ⅾ'
            'D'
        when 'ⅾ'
            'd'
        when 'Ⅿ'
            'M'
        when 'ⅿ'
            'm'
        else
            char
        end
    end

    result.join
end

# @param [Array<String>] array Array of strings
# @return [Array<String>] Array of shuffled strings
def self.shuffle_words(array)
    array.each do |string|
        select_words_re = /\S+/
        words = string.scan(select_words_re).shuffle
        string.gsub(select_words_re) { words.pop || '' }
    end
end

def escaped?(line, index)
    backslash_count = 0

    (0..index).reverse_each do |i|
        break if line[i] != '\\'
        backslash_count += 1
    end

    backslash_count.even?
end

# @param [String] ruby_code
def extract_strings(ruby_code, mode: false)
    strings = mode ? [] : Set.new
    indices = []
    inside_string = false
    inside_multiline_comment = false
    string_start_index = 0
    current_quote_type = ''

    global_index = 0
    ruby_code.each_line do |line|
        stripped = line.strip

        unless inside_string
            if stripped[0] == '#'
                global_index += line.length
                next
            end

            if stripped.start_with?('=begin')
                inside_multiline_comment = true
            elsif stripped.start_with?('=end')
                inside_multiline_comment = false
            end
        end

        if inside_multiline_comment
            global_index += line.length
            next
        end

        i = 0
        while i < line.length
            char = line[i]

            break if !inside_string && char == '#'

            if !inside_string && %w[" '].include?(char)
                inside_string = true
                string_start_index = global_index + i
                current_quote_type = char
            elsif inside_string && char == current_quote_type && escaped?(line, i - 1)
                extracted_string = ruby_code[string_start_index + 1...global_index + i].gsub(/\r?\n/, '\#')

                if mode
                    strings << extracted_string
                    indices << string_start_index + 1
                else
                    strings.add(extracted_string)
                end

                inside_string = false
                current_quote_type = ''
            end

            i += 1
        end

        global_index += line.length
    end

    mode ? [strings, indices] : strings.to_a
end

ENCODINGS = %w[
    ISO-8859-1
    Windows-1252
    Shift_JIS
    GB18030
    EUC-JP
    ISO-2022-JP
    BIG5
    EUC-KR
    Windows-1251
    KOI8-R
    UTF-8
].freeze

# @param [String] input_string
# @return [String]
def convert_to_utf8(input_string)
    ENCODINGS.each do |encoding|
        return input_string.encode('UTF-8', encoding)
    rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
        next
    end

    raise EncodingError("Cannot convert string #{input_string} to UTF-8")
end