global

def self.read_scripts(scripts_file_path, output_path, logging, processing_type)

Parameters:
  • processing_type (String) --
  • logging (Boolean) --
  • output_path (String) --
  • scripts_file_path (String) --
def self.read_scripts(scripts_file_path, output_path, logging, processing_type)
    scripts_filename = File.basename(scripts_file_path)
    scripts_basename = File.basename(scripts_file_path, '.*').downcase

    scripts_plain_output_path = File.join(output_path, "#{scripts_basename}_plain.txt")
    scripts_output_path = File.join(output_path, "#{scripts_basename}.txt")
    scripts_trans_output_path = File.join(output_path, "#{scripts_basename}_trans.txt")

    if processing_type == :default && File.exist?(scripts_trans_output_path)
        puts 'scripts_trans.txt file already exists. If you want to forcefully re-read all files, use --force flag, or --append if you want append new text to already existing files.'
        return
    end

    script_entries = Marshal.load(File.binread(scripts_file_path))

    scripts_lines = IndexSet.new
    scripts_translation_map = nil

    if processing_type == :append
        if File.exist?(scripts_trans_output_path)
            scripts_translation_map = Hash[File.readlines(scripts_output_path, chomp: true)
                                               .zip(File.readlines(scripts_trans_output_path, chomp: true))]
        else
            puts "Files aren't already parsed. Continuing as if --append flag was omitted."
            processing_type = :default
        end
    end

    codes_content = []

    # This code was fun before `that` game used Windows-1252 degree symbol
    script_entries.each do |script|
        code = Zlib::Inflate.inflate(script[2]).force_encoding('UTF-8')
        # we're fucking cloning because of encoding issue
        codes_content.push(code.clone)

        # I figured how String#encode works - now everything is good
        unless code.valid_encoding?
            [Encoding::UTF_8, Encoding::WINDOWS_1252, Encoding::SHIFT_JIS].each do |encoding|
                encoded = code.encode(code.encoding, encoding)

                if encoded.valid_encoding?
                    code.force_encoding(encoding)
                    break
                end
            rescue Encoding::InvalidByteSequenceError
                next
            end
        end

        extract_quoted_strings(code).each do |string|
            # Removes the U+3000 Japanese typographical space to check if string, when stripped, is truly empty
            string = string.strip.delete(' ')

            next if string.empty?

            # Maybe this mess will remove something that mustn't be removed, but it needs to be tested
            next if string.start_with?(/([#!?$@]|(\.\/)?(Graphics|Data|Audio|CG|Movies|Save)\/)/) ||
                string.match?(/^[^\p{L}]+$/) ||
                string.match?(/^\d+$/) ||
                string.match?(/%.*(\d|\+|\*)d\]?:?$/) ||
                string.match?(/^\[(ON|OFF)\]$/) ||
                string.match?(/^\[\]$/) ||
                string.match?(/^(.)\1{2,}$/) ||
                string.match?(/^(false|true)$/) ||
                string.match?(/^[wr]b$/) ||
                string.match?(/^(?=.*\d)[A-Za-z0-9\-]+$/) ||
                string.match?(/^[a-z\-()\/ +'&]*$/) ||
                string.match?(/^[A-Za-z]+[+-]$/) ||
                string.match?(/^[.()+-:;\[\]^~%&!*\/→×??x%▼|]$/) ||
                string.match?(/^Tile.*[A-Z]$/) ||
                string.match?(/^[a-zA-Z][a-z]+([A-Z][a-z]*)+$/) ||
                string.match?(/^Cancel Action$|^Invert$|^End$|^Individual$|^Missed File$|^Bitmap$|^Audio$/) ||
                string.match?(/\.(mp3|ogg|jpg|png|ini|txt)$/i) ||
                string.match?(/\/(\d.*)?$/) ||
                string.match?(/FILE$/) ||
                string.match?(/#\{/) ||
                string.match?(/(?<!\\)\\(?![\\G#])/) ||
                string.match?(/\+?=?=/) ||
                string.match?(/[}{_<>]/) ||
                string.match?(/r[vx]data/) ||
                string.match?(/No such file or directory/) ||
                string.match?(/level \*\*/) ||
                string.match?(/Courier New|Comic Sans|Lucida|Verdana|Tahoma|Arial|Times New Roman/) ||
                string.match?(/Player start location/) ||
                string.match?(/Common event call has exceeded/) ||
                string.match?(/se-/) ||
                string.match?(/Start Pos/) ||
                string.match?(/An error has occurred/) ||
                string.match?(/Define it first/) ||
                string.match?(/Process Skill/) ||
                string.match?(/Wpn Only/) ||
                string.match?(/Don't Wait/) ||
                string.match?(/Clear image/) ||
                string.match?(/Can Collapse/)

            scripts_translation_map.insert_at_index(scripts_lines.length, string, '') if processing_type == :append &&
                !scripts_translation_map.include?(string)

            scripts_lines.add(string)
        end
    end

    puts "Parsed #{scripts_filename}" if logging

    File.binwrite(scripts_plain_output_path, codes_content.join("\n"))

    original_content, translated_content = if processing_type == :append
                                               [scripts_translation_map.keys.join("\n"),
                                                scripts_translation_map.values.join("\n")]
                                           else
                                               [scripts_lines.join("\n"),
                                                "\n" * (scripts_lines.empty? ? 0 : scripts_lines.length - 1)]
                                           end

    File.binwrite(scripts_output_path, original_content)
    File.binwrite(scripts_trans_output_path, translated_content)
end