lib/json/truffle_ruby/generator.rb



# frozen_string_literal: true
module JSON
  module TruffleRuby
    module Generator
      MAP = {
        "\x0" => '\u0000',
        "\x1" => '\u0001',
        "\x2" => '\u0002',
        "\x3" => '\u0003',
        "\x4" => '\u0004',
        "\x5" => '\u0005',
        "\x6" => '\u0006',
        "\x7" => '\u0007',
        "\b"  =>  '\b',
        "\t"  =>  '\t',
        "\n"  =>  '\n',
        "\xb" => '\u000b',
        "\f"  =>  '\f',
        "\r"  =>  '\r',
        "\xe" => '\u000e',
        "\xf" => '\u000f',
        "\x10" => '\u0010',
        "\x11" => '\u0011',
        "\x12" => '\u0012',
        "\x13" => '\u0013',
        "\x14" => '\u0014',
        "\x15" => '\u0015',
        "\x16" => '\u0016',
        "\x17" => '\u0017',
        "\x18" => '\u0018',
        "\x19" => '\u0019',
        "\x1a" => '\u001a',
        "\x1b" => '\u001b',
        "\x1c" => '\u001c',
        "\x1d" => '\u001d',
        "\x1e" => '\u001e',
        "\x1f" => '\u001f',
        '"'   =>  '\"',
        '\\'  =>  '\\\\',
      }.freeze # :nodoc:

      ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc:

      SCRIPT_SAFE_MAP = MAP.merge(
        '/'  =>  '\\/',
        "\u2028".b => '\u2028',
        "\u2029".b => '\u2029',
      ).freeze

      SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b)

      # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with
      # UTF16 big endian characters as \u????, and return it.
      def utf8_to_json(string, script_safe = false) # :nodoc:
        string = string.b
        if script_safe
          string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& }
        else
          string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& }
        end
        string.force_encoding(::Encoding::UTF_8)
        string
      end

      def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
        string = string.b
        map = script_safe ? SCRIPT_SAFE_MAP : MAP
        string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& }
        string.gsub!(/(
          (?:
           [\xc2-\xdf][\x80-\xbf]    |
           [\xe0-\xef][\x80-\xbf]{2} |
           [\xf0-\xf4][\x80-\xbf]{3}
          )+ |
          [\x80-\xc1\xf5-\xff]       # invalid
        )/nx) { |c|
          c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
          s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0]
          s.force_encoding(::Encoding::BINARY)
          s.gsub!(/.{4}/n, '\\\\u\&')
          s.force_encoding(::Encoding::UTF_8)
        }
        string.force_encoding(::Encoding::UTF_8)
        string
      rescue => e
        raise GeneratorError.wrap(e)
      end

      def valid_utf8?(string)
        encoding = string.encoding
        (encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) &&
          string.valid_encoding?
      end
      module_function :utf8_to_json, :utf8_to_json_ascii, :valid_utf8?

      # This class is used to create State instances, that are use to hold data
      # while generating a JSON text from a Ruby data structure.
      class State
        def self.generate(obj, opts = nil)
          new(opts).generate(obj)
        end

        # Creates a State object from _opts_, which ought to be Hash to create
        # a new State instance configured by _opts_, something else to create
        # an unconfigured instance. If _opts_ is a State object, it is just
        # returned.
        def self.from_state(opts)
          case
          when self === opts
            opts
          when opts.respond_to?(:to_hash)
            new(opts.to_hash)
          when opts.respond_to?(:to_h)
            new(opts.to_h)
          else
            SAFE_STATE_PROTOTYPE.dup
          end
        end

        # Instantiates a new State object, configured by _opts_.
        #
        # _opts_ can have the following keys:
        #
        # * *indent*: a string used to indent levels (default: ''),
        # * *space*: a string that is put after, a : or , delimiter (default: ''),
        # * *space_before*: a string that is put before a : pair delimiter (default: ''),
        # * *object_nl*: a string that is put at the end of a JSON object (default: ''),
        # * *array_nl*: a string that is put at the end of a JSON array (default: ''),
        # * *script_safe*: true if U+2028, U+2029 and forward slash (/) should be escaped
        #   as to make the JSON object safe to interpolate in a script tag (default: false).
        # * *check_circular*: is deprecated now, use the :max_nesting option instead,
        # * *max_nesting*: sets the maximum level of data structure nesting in
        #   the generated JSON, max_nesting = 0 if no maximum should be checked.
        # * *allow_nan*: true if NaN, Infinity, and -Infinity should be
        #   generated, otherwise an exception is thrown, if these values are
        #   encountered. This options defaults to false.
        def initialize(opts = nil)
          @indent                = ''
          @space                 = ''
          @space_before          = ''
          @object_nl             = ''
          @array_nl              = ''
          @allow_nan             = false
          @ascii_only            = false
          @depth                 = 0
          @buffer_initial_length = 1024
          @script_safe           = false
          @strict                = false
          @max_nesting           = 100
          configure(opts) if opts
        end

        # This string is used to indent levels in the JSON text.
        attr_accessor :indent

        # This string is used to insert a space between the tokens in a JSON
        # string.
        attr_accessor :space

        # This string is used to insert a space before the ':' in JSON objects.
        attr_accessor :space_before

        # This string is put at the end of a line that holds a JSON object (or
        # Hash).
        attr_accessor :object_nl

        # This string is put at the end of a line that holds a JSON array.
        attr_accessor :array_nl

        # This integer returns the maximum level of data structure nesting in
        # the generated JSON, max_nesting = 0 if no maximum is checked.
        attr_accessor :max_nesting

        # If this attribute is set to true, forward slashes will be escaped in
        # all json strings.
        attr_accessor :script_safe

        # If this attribute is set to true, attempting to serialize types not
        # supported by the JSON spec will raise a JSON::GeneratorError
        attr_accessor :strict

        # :stopdoc:
        attr_reader :buffer_initial_length

        def buffer_initial_length=(length)
          if length > 0
            @buffer_initial_length = length
          end
        end
        # :startdoc:

        # This integer returns the current depth data structure nesting in the
        # generated JSON.
        attr_accessor :depth

        def check_max_nesting # :nodoc:
          return if @max_nesting.zero?
          current_nesting = depth + 1
          current_nesting > @max_nesting and
            raise NestingError, "nesting of #{current_nesting} is too deep"
        end

        # Returns true, if circular data structures are checked,
        # otherwise returns false.
        def check_circular?
          !@max_nesting.zero?
        end

        # Returns true if NaN, Infinity, and -Infinity should be considered as
        # valid JSON and output.
        def allow_nan?
          @allow_nan
        end

        # Returns true, if only ASCII characters should be generated. Otherwise
        # returns false.
        def ascii_only?
          @ascii_only
        end

        # Returns true, if forward slashes are escaped. Otherwise returns false.
        def script_safe?
          @script_safe
        end

        # Returns true, if strict mode is enabled. Otherwise returns false.
        # Strict mode only allow serializing JSON native types: Hash, Array,
        # String, Integer, Float, true, false and nil.
        def strict?
          @strict
        end

        # Configure this State instance with the Hash _opts_, and return
        # itself.
        def configure(opts)
          if opts.respond_to?(:to_hash)
            opts = opts.to_hash
          elsif opts.respond_to?(:to_h)
            opts = opts.to_h
          else
            raise TypeError, "can't convert #{opts.class} into Hash"
          end
          opts.each do |key, value|
            instance_variable_set "@#{key}", value
          end

          # NOTE: If adding new instance variables here, check whether #generate should check them for #generate_json
          @indent                = opts[:indent]        || '' if opts.key?(:indent)
          @space                 = opts[:space]         || '' if opts.key?(:space)
          @space_before          = opts[:space_before]  || '' if opts.key?(:space_before)
          @object_nl             = opts[:object_nl]     || '' if opts.key?(:object_nl)
          @array_nl              = opts[:array_nl]      || '' if opts.key?(:array_nl)
          @allow_nan             = !!opts[:allow_nan]         if opts.key?(:allow_nan)
          @ascii_only            = opts[:ascii_only]          if opts.key?(:ascii_only)
          @depth                 = opts[:depth] || 0
          @buffer_initial_length ||= opts[:buffer_initial_length]

          @script_safe = if opts.key?(:script_safe)
            !!opts[:script_safe]
          elsif opts.key?(:escape_slash)
            !!opts[:escape_slash]
          else
            false
          end

          @strict                = !!opts[:strict] if opts.key?(:strict)

          if !opts.key?(:max_nesting) # defaults to 100
            @max_nesting = 100
          elsif opts[:max_nesting]
            @max_nesting = opts[:max_nesting]
          else
            @max_nesting = 0
          end
          self
        end
        alias merge configure

        # Returns the configuration instance variables as a hash, that can be
        # passed to the configure method.
        def to_h
          result = {}
          instance_variables.each do |iv|
            iv = iv.to_s[1..-1]
            result[iv.to_sym] = self[iv]
          end
          result
        end

        alias to_hash to_h

        # Generates a valid JSON document from object +obj+ and
        # returns the result. If no valid JSON document can be
        # created this method raises a
        # GeneratorError exception.
        def generate(obj)
          if @indent.empty? and @space.empty? and @space_before.empty? and @object_nl.empty? and @array_nl.empty? and
              !@ascii_only and !@script_safe and @max_nesting == 0 and !@strict
            result = generate_json(obj, ''.dup)
          else
            result = obj.to_json(self)
          end
          JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError,
            "source sequence #{result.inspect} is illegal/malformed utf-8"
          result
        end

        # Handles @allow_nan, @buffer_initial_length, other ivars must be the default value (see above)
        private def generate_json(obj, buf)
          case obj
          when Hash
            buf << '{'
            first = true
            obj.each_pair do |k,v|
              buf << ',' unless first

              key_str = k.to_s
              if key_str.class == String
                fast_serialize_string(key_str, buf)
              elsif key_str.is_a?(String)
                generate_json(key_str, buf)
              else
                raise TypeError, "#{k.class}#to_s returns an instance of #{key_str.class}, expected a String"
              end

              buf << ':'
              generate_json(v, buf)
              first = false
            end
            buf << '}'
          when Array
            buf << '['
            first = true
            obj.each do |e|
              buf << ',' unless first
              generate_json(e, buf)
              first = false
            end
            buf << ']'
          when String
            if obj.class == String
              fast_serialize_string(obj, buf)
            else
              buf << obj.to_json(self)
            end
          when Integer
            buf << obj.to_s
          else
            # Note: Float is handled this way since Float#to_s is slow anyway
            buf << obj.to_json(self)
          end
        end

        # Assumes !@ascii_only, !@script_safe
        private def fast_serialize_string(string, buf) # :nodoc:
          buf << '"'
          unless string.encoding == ::Encoding::UTF_8
            begin
              string = string.encode(::Encoding::UTF_8)
            rescue Encoding::UndefinedConversionError => error
              raise GeneratorError, error.message
            end
          end
          raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding?

          if /["\\\x0-\x1f]/n.match?(string)
            buf << string.gsub(/["\\\x0-\x1f]/n, MAP)
          else
            buf << string
          end
          buf << '"'
        end

        # Return the value returned by method +name+.
        def [](name)
          if respond_to?(name)
            __send__(name)
          else
            instance_variable_get("@#{name}") if
              instance_variables.include?("@#{name}".to_sym) # avoid warning
          end
        end

        def []=(name, value)
          if respond_to?(name_writer = "#{name}=")
            __send__ name_writer, value
          else
            instance_variable_set "@#{name}", value
          end
        end
      end

      module GeneratorMethods
        module Object
          # Converts this object to a string (calling #to_s), converts
          # it to a JSON string, and returns the result. This is a fallback, if no
          # special method #to_json was defined for some object.
          def to_json(state = nil, *)
            if state && State.from_state(state).strict?
              raise GeneratorError, "#{self.class} not allowed in JSON"
            else
              to_s.to_json
            end
          end
        end

        module Hash
          # Returns a JSON string containing a JSON object, that is unparsed from
          # this Hash instance.
          # _state_ is a JSON::State object, that can also be used to configure the
          # produced JSON string output further.
          # _depth_ is used to find out nesting depth, to indent accordingly.
          def to_json(state = nil, *)
            state = State.from_state(state)
            state.check_max_nesting
            json_transform(state)
          end

          private

          def json_shift(state)
            state.object_nl.empty? or return ''
            state.indent * state.depth
          end

          def json_transform(state)
            depth = state.depth += 1

            if empty?
              state.depth -= 1
              return '{}'
            end

            delim = ",#{state.object_nl}"
            result = +"{#{state.object_nl}"
            first = true
            indent = !state.object_nl.empty?
            each { |key, value|
              result << delim unless first
              result << state.indent * depth if indent

              key_str = key.to_s
              if key_str.is_a?(String)
                key_json = key_str.to_json(state)
              else
                raise TypeError, "#{key.class}#to_s returns an instance of #{key_str.class}, expected a String"
              end

              result = +"#{result}#{key_json}#{state.space_before}:#{state.space}"
              if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
                raise GeneratorError, "#{value.class} not allowed in JSON"
              elsif value.respond_to?(:to_json)
                result << value.to_json(state)
              else
                result << %{"#{String(value)}"}
              end
              first = false
            }
            depth = state.depth -= 1
            unless first
              result << state.object_nl
              result << state.indent * depth if indent
            end
            result << '}'
            result
          end
        end

        module Array
          # Returns a JSON string containing a JSON array, that is unparsed from
          # this Array instance.
          # _state_ is a JSON::State object, that can also be used to configure the
          # produced JSON string output further.
          def to_json(state = nil, *)
            state = State.from_state(state)
            state.check_max_nesting
            json_transform(state)
          end

          private

          def json_transform(state)
            depth = state.depth += 1

            if empty?
              state.depth -= 1
              return '[]'
            end

            result = '['.dup
            if state.array_nl.empty?
              delim = ","
            else
              result << state.array_nl
              delim = ",#{state.array_nl}"
            end

            first = true
            indent = !state.array_nl.empty?
            each { |value|
              result << delim unless first
              result << state.indent * depth if indent
              if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
                raise GeneratorError, "#{value.class} not allowed in JSON"
              elsif value.respond_to?(:to_json)
                result << value.to_json(state)
              else
                result << %{"#{String(value)}"}
              end
              first = false
            }
            depth = state.depth -= 1
            result << state.array_nl
            result << state.indent * depth if indent
            result << ']'
          end
        end

        module Integer
          # Returns a JSON string representation for this Integer number.
          def to_json(*) to_s end
        end

        module Float
          # Returns a JSON string representation for this Float number.
          def to_json(state = nil, *)
            state = State.from_state(state)
            case
            when infinite?
              if state.allow_nan?
                to_s
              else
                raise GeneratorError, "#{self} not allowed in JSON"
              end
            when nan?
              if state.allow_nan?
                to_s
              else
                raise GeneratorError, "#{self} not allowed in JSON"
              end
            else
              to_s
            end
          end
        end

        module String
          # This string should be encoded with UTF-8 A call to this method
          # returns a JSON string encoded with UTF16 big endian characters as
          # \u????.
          def to_json(state = nil, *args)
            state = State.from_state(state)
            if encoding == ::Encoding::UTF_8
              unless valid_encoding?
                raise GeneratorError, "source sequence is illegal/malformed utf-8"
              end
              string = self
            else
              string = encode(::Encoding::UTF_8)
            end
            if state.ascii_only?
              %("#{JSON::TruffleRuby::Generator.utf8_to_json_ascii(string, state.script_safe)}")
            else
              %("#{JSON::TruffleRuby::Generator.utf8_to_json(string, state.script_safe)}")
            end
          rescue Encoding::UndefinedConversionError => error
            raise ::JSON::GeneratorError, error.message
          end

          # Module that holds the extending methods if, the String module is
          # included.
          module Extend
            # Raw Strings are JSON Objects (the raw bytes are stored in an
            # array for the key "raw"). The Ruby String can be created by this
            # module method.
            def json_create(o)
              o['raw'].pack('C*')
            end
          end

          # Extends _modul_ with the String::Extend module.
          def self.included(modul)
            modul.extend Extend
          end

          # This method creates a raw object hash, that can be nested into
          # other data structures and will be unparsed as a raw string. This
          # method should be used, if you want to convert raw strings to JSON
          # instead of UTF-8 strings, e. g. binary data.
          def to_json_raw_object
            {
              JSON.create_id  => self.class.name,
              'raw'           => self.unpack('C*'),
            }
          end

          # This method creates a JSON text from the result of
          # a call to to_json_raw_object of this String.
          def to_json_raw(*args)
            to_json_raw_object.to_json(*args)
          end
        end

        module TrueClass
          # Returns a JSON string for true: 'true'.
          def to_json(*) 'true' end
        end

        module FalseClass
          # Returns a JSON string for false: 'false'.
          def to_json(*) 'false' end
        end

        module NilClass
          # Returns a JSON string for nil: 'null'.
          def to_json(*) 'null' end
        end
      end
    end
  end
end