lib/polars/utils/construction/series.rb



module Polars
  module Utils
    def self.sequence_to_rbseries(name, values, dtype: nil, strict: true, nan_to_null: false)
      ruby_dtype = nil

      if values.is_a?(Range)
        if values.begin.is_a?(::String)
          values = values.to_a
        else
          return range_to_series(name, values, dtype: dtype)._s
        end
      end

      if values.length == 0 && dtype.nil?
        dtype = Null
      elsif [List, Array].include?(dtype)
        ruby_dtype = ::Array
      end

      rb_temporal_types = [::Date, ::DateTime, ::Time]
      rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)

      value = get_first_non_none(values)

      if !dtype.nil? && is_polars_dtype(dtype) && !dtype.nested? && dtype != Unknown && ruby_dtype.nil?
        constructor = polars_type_to_constructor(dtype)
        rbseries = _construct_series_with_fallbacks(
          constructor, name, values, dtype, strict: strict
        )

        if [Date, Datetime, Duration, Time, Boolean, Categorical, Enum].include?(dtype) || dtype.is_a?(Decimal) || dtype.is_a?(Categorical)
          if rbseries.dtype != dtype
            rbseries = rbseries.cast(dtype, true, false)
          end
        end

        # Uninstanced Decimal is a bit special and has various inference paths
        if dtype == Decimal
          if rbseries.dtype == String
            rbseries = rbseries.str_to_decimal_infer(0)
          elsif rbseries.dtype.float?
            # Go through string so we infer an appropriate scale.
            rbseries = rbseries.cast(
              String, strict, false
            ).str_to_decimal_infer(0)
          elsif rbseries.dtype.integer? || rbseries.dtype == Null
            rbseries = rbseries.cast(
              Decimal.new(nil, 0), strict, false
            )
          elsif !rbseries.dtype.is_a?(Decimal)
            msg = "can't convert #{rbseries.dtype} to Decimal"
            raise TypeError, msg
          end
        end

        return rbseries

      elsif dtype == Struct
        struct_schema = dtype.is_a?(Struct) ? dtype.to_schema : nil
        empty = {}

        data = []
        invalid = []
        values.each_with_index do |v, i|
          if v.nil?
            invalid << i
            data << empty
          else
            data << v
          end
        end

        return sequence_to_rbdf(
          data,
          schema: struct_schema,
          orient: "row",
        ).to_struct(name, invalid)
      end

      if ruby_dtype.nil?
        if value.nil?
          constructor = polars_type_to_constructor(Null)
          return constructor.(name, values, strict)
        end

        ruby_dtype = value.class
      end

      # temporal branch
      if rb_temporal_types.include?(ruby_dtype)
        if dtype.nil?
          dtype = parse_into_dtype(ruby_dtype)
        elsif rb_temporal_types.include?(dtype)
          dtype = parse_into_dtype(dtype)
        end

        values_dtype = value.nil? ? nil : try_parse_into_dtype(value.class)
        if !values_dtype.nil? && values_dtype.float?
          msg = "'float' object cannot be interpreted as a #{ruby_dtype.name.inspect}"
          raise TypeError, msg
        end

        rb_series = RbSeries.new_from_any_values(name, values, strict)

        time_unit = dtype.respond_to?(:time_unit) ? dtype.time_unit : nil
        time_zone = dtype.respond_to?(:time_zone) ? dtype.time_zone : nil

        if dtype.temporal? && values_dtype == String && dtype != Duration
          s = wrap_s(rb_series).str.strptime(dtype, strict: strict)
        elsif !time_unit.nil? && values_dtype != Date
          s = wrap_s(rb_series).dt.cast_time_unit(time_unit)
        else
          s = wrap_s(rb_series)
        end

        if dtype == Datetime && !time_zone.nil?
          return s.dt.convert_time_zone(time_zone)._s
        end
        s._s

      elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
        raise Todo

      elsif ruby_dtype == ::Array
        if dtype.nil?
          RbSeries.new_from_any_values(name, values, strict)
        elsif dtype.is_a?(Object)
          RbSeries.new_object(name, values, strict)
        else
          inner_dtype = dtype.respond_to?(:inner) ? dtype.inner : nil
          if !inner_dtype.nil?
            rbseries_list =
              values.map do |value|
                if value.nil?
                  nil
                else
                  sequence_to_rbseries(
                    "",
                    value,
                    dtype: inner_dtype,
                    strict: strict,
                    nan_to_null: nan_to_null,
                  )
                end
              end
            rbseries = RbSeries.new_series_list(name, rbseries_list, strict)
          else
            # panics in Python
            raise Todo if dtype.eql?(Array)

            rbseries = RbSeries.new_from_any_values_and_dtype(
              name, values, dtype, strict
            )
          end
          if dtype != rbseries.dtype
            rbseries = rbseries.cast(dtype, false, false)
          end
          rbseries
        end

      elsif ruby_dtype == Series
        RbSeries.new_series_list(
          name, values.map { |v| !v.nil? ? v._s : v }, strict
        )

      elsif ruby_dtype == RbSeries
        RbSeries.new_series_list(name, values, strict)
      else
        constructor =
          if value.is_a?(::String)
            if value.encoding == Encoding::UTF_8
              RbSeries.method(:new_str)
            else
              RbSeries.method(:new_binary)
            end
          elsif value.is_a?(Integer) && values.any? { |v| v.is_a?(Float) }
            # TODO improve performance
            RbSeries.method(:new_opt_f64)
          else
            rb_type_to_constructor(value.class)
          end

        if constructor == RbSeries.method(:new_object)
          srs = RbSeries.new_from_any_values(name, values, strict)
          return srs
        end

        _construct_series_with_fallbacks(constructor, name, values, dtype, strict: strict)
      end
    end

    def self._construct_series_with_fallbacks(constructor, name, values, dtype, strict:)
      begin
        constructor.(name, values, strict)
      rescue
        if dtype.nil?
          RbSeries.new_from_any_values(name, values, strict)
        else
          RbSeries.new_from_any_values_and_dtype(name, values, dtype, strict)
        end
      end
    end

    def self.numo_to_rbseries(name, values, strict: true, nan_to_null: false)
      # not needed yet
      # if !values.contiguous?
      # end

      if values.shape.length == 1
        values, dtype = numo_values_and_dtype(values)
        constructor = numo_type_to_constructor(dtype)
        constructor.(
          name, values.to_a, [Numo::SFloat, Numo::DFloat].include?(dtype) ? nan_to_null : strict
        )
      else
        original_shape = values.shape
        values_1d = values.reshape(original_shape.inject(&:*))

        rb_s = numo_to_rbseries(
          name,
          values_1d,
          strict: strict,
          nan_to_null: nan_to_null
        )
        Utils.wrap_s(rb_s).reshape(original_shape)._s
      end
    end

    def self.series_to_rbseries(name, values, dtype: nil, strict: true)
      s = values.clone
      if !dtype.nil? && dtype != s.dtype
        s = s.cast(dtype, strict: strict)
      end
      if !name.nil?
        s = s.alias(name)
      end
      s._s
    end

    def self.dataframe_to_rbseries(
      name,
      values,
      dtype: nil,
      strict: true
    )
      if values.width > 1
        name ||= ""
        s = values.to_struct(name)
      elsif values.width == 1
        s = values.to_series
        if !name.nil?
          s = s.alias(name)
        end
      else
        msg = "cannot initialize Series from DataFrame without any columns"
        raise TypeError, msg
      end

      if !dtype.nil? && dtype != s.dtype
        s = s.cast(dtype, strict: strict)
      end

      s._s
    end

    # TODO move rest

    POLARS_TYPE_TO_CONSTRUCTOR = {
      Float16 => RbSeries.method(:new_opt_f16),
      Float32 => RbSeries.method(:new_opt_f32),
      Float64 => RbSeries.method(:new_opt_f64),
      Int8 => RbSeries.method(:new_opt_i8),
      Int16 => RbSeries.method(:new_opt_i16),
      Int32 => RbSeries.method(:new_opt_i32),
      Int64 => RbSeries.method(:new_opt_i64),
      Int128 => RbSeries.method(:new_opt_i128),
      UInt8 => RbSeries.method(:new_opt_u8),
      UInt16 => RbSeries.method(:new_opt_u16),
      UInt32 => RbSeries.method(:new_opt_u32),
      UInt64 => RbSeries.method(:new_opt_u64),
      UInt128 => RbSeries.method(:new_opt_u128),
      Decimal => RbSeries.method(:new_decimal),
      Date => RbSeries.method(:new_from_any_values),
      Datetime => RbSeries.method(:new_from_any_values),
      Duration => RbSeries.method(:new_from_any_values),
      Time => RbSeries.method(:new_from_any_values),
      Boolean => RbSeries.method(:new_opt_bool),
      Utf8 => RbSeries.method(:new_str),
      Object => RbSeries.method(:new_object),
      Categorical => RbSeries.method(:new_str),
      Enum => RbSeries.method(:new_str),
      Binary => RbSeries.method(:new_binary),
      Null => RbSeries.method(:new_null)
    }

    def self.polars_type_to_constructor(dtype)
      if dtype.is_a?(Array)
        return lambda do |name, values, strict|
          RbSeries.new_array(name, values, strict, dtype)
        end
      end

      begin
        base_type = dtype.base_type
        POLARS_TYPE_TO_CONSTRUCTOR.fetch(base_type)
      rescue KeyError
        raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
      end
    end

    RB_TYPE_TO_CONSTRUCTOR = {
      Float => RbSeries.method(:new_opt_f64),
      TrueClass => RbSeries.method(:new_opt_bool),
      FalseClass => RbSeries.method(:new_opt_bool),
      Integer => RbSeries.method(:new_opt_i64),
      String => RbSeries.method(:new_str),
      BigDecimal => RbSeries.method(:new_decimal)
    }

    def self.rb_type_to_constructor(dtype)
      RB_TYPE_TO_CONSTRUCTOR.fetch(dtype, RbSeries.method(:new_object))
    end

    def self.numo_values_and_dtype(values)
      [values, values.class]
    end

    def self.numo_type_to_constructor(dtype)
      {
        Numo::Float32 => RbSeries.method(:new_opt_f32),
        Numo::Float64 => RbSeries.method(:new_opt_f64),
        Numo::Int8 => RbSeries.method(:new_opt_i8),
        Numo::Int16 => RbSeries.method(:new_opt_i16),
        Numo::Int32 => RbSeries.method(:new_opt_i32),
        Numo::Int64 => RbSeries.method(:new_opt_i64),
        Numo::UInt8 => RbSeries.method(:new_opt_u8),
        Numo::UInt16 => RbSeries.method(:new_opt_u16),
        Numo::UInt32 => RbSeries.method(:new_opt_u32),
        Numo::UInt64 => RbSeries.method(:new_opt_u64)
      }.fetch(dtype)
    rescue KeyError
      RbSeries.method(:new_object)
    end
  end
end