module Polars::Selectors

def self._combine_as_selector(items, *more_items)

Other tags:
    Private: -
def self._combine_as_selector(items, *more_items)
  names, regexes, dtypes = [], [], []
  selectors = []
  ((items.is_a?(::Array) ? items : [items]) + more_items).each do |item|
    if Utils.is_selector(item)
      selectors << item
    elsif item.is_a?(::String)
      if item.start_with?("^") && item.end_with?("$")
        regexes << item
      else
        names << item
      end
    elsif Utils.is_polars_dtype(item)
      dtypes << item
    elsif Utils.is_column(item)
      names << item.meta.output_name
    else
      msg = "expected one or more `str`, `DataType` or selector; found #{item.inspect} instead."
      raise TypeError, msg
    end
  end
  selected = []
  if names.any?
    selected << by_name(*names, require_all: false)
  end
  if dtypes.any?
    selected << by_dtype(*dtypes)
  end
  if regexes.any?
    raise Todo
  end
  if selectors.any?
    selected.concat(selectors)
  end
  selected.reduce(empty, :|)
end

def self._re_string(string, escape: true)

Other tags:
    Private: -
def self._re_string(string, escape: true)
  if string.is_a?(::String)
    rx = escape ? Utils.re_escape(string) : string
  else
    strings = []
    string.each do |st|
      if st.is_a?(Array)
        strings.concat(st)
      else
        strings << st
      end
    end
    rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|")
  end
  "(#{rx})"
end

def self.all

Other tags:
    Example: Select all columns *except* for those matching the given dtypes: -
    Example: Select all columns, casting them to string: -

Returns:
  • (Selector) -
def self.all
  Selector._from_rbselector(RbSelector.all)
end

def self.alpha(ascii_only: false, ignore_spaces: false)

Other tags:
    Example: Select all columns *except* for those with alphabetic names: -
    Example: Constrain the definition of "alphabetic" to ASCII characters only: -
    Example: Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here: -

Other tags:
    Note: -

Returns:
  • (Selector) -

Parameters:
  • ignore_spaces (Boolean) --
  • ascii_only (Boolean) --
def self.alpha(ascii_only: false, ignore_spaces: false)
  # note that we need to supply a pattern compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_space = ignore_spaces ? " " : ""
  Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$"))
end

def self.alphanumeric(ascii_only: false, ignore_spaces: false)

Other tags:
    Example: Select all columns *except* for those with alphanumeric names: -
    Example: Select columns with alphanumeric names: -

Other tags:
    Note: -

Returns:
  • (Selector) -

Parameters:
  • ignore_spaces (Boolean) --
  • ascii_only (Boolean) --
def self.alphanumeric(ascii_only: false, ignore_spaces: false)
  # note that we need to supply patterns compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_digit = ascii_only ? "0-9" : "\\d"
  re_space = ignore_spaces ? " " : ""
  return Selector._from_rbselector(
    RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$")
  )
end

def self.array(inner = nil, width: nil)

Other tags:
    Example: Select all array columns with a certain matching inner type: -
    Example: Select all columns *except* for those that are array: -
    Example: Select all array columns: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.array(inner = nil, width: nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.array(inner_s, width))
end

def self.binary

Other tags:
    Example: Select all columns *except* for those that are binary: -
    Example: Select binary columns and export as a hash: -

Returns:
  • (Selector) -
def self.binary
  by_dtype([Binary])
end

def self.boolean

Other tags:
    Example: Select all columns *except* for those that are boolean: -
    Example: Select and invert boolean columns: -

Returns:
  • (Selector) -
def self.boolean
  by_dtype([Boolean])
end

def self.by_dtype(*dtypes)

Other tags:
    Example: Select all columns that are not of date or string dtype: -
    Example: Select all columns with date or string dtypes: -

Returns:
  • (Selector) -
def self.by_dtype(*dtypes)
  all_dtypes = []
  dtypes.each do |tp|
    if Utils.is_polars_dtype(tp) || tp.is_a?(Class)
      all_dtypes << tp
    elsif tp.is_a?(::Array)
      tp.each do |t|
        if !(Utils.is_polars_dtype(t) || t.is_a?(Class))
          msg = "invalid dtype: #{t.inspect}"
          raise TypeError, msg
        end
        all_dtypes << t
      end
    else
      msg = "invalid dtype: #{tp.inspect}"
      raise TypeError, msg
    end
  end
  Selector._by_dtype(all_dtypes)
end

def self.by_index(*indices, require_all: true)

Other tags:
    Example: Select all columns *except* for the even-indexed ones: -
    Example: Select the "key" column and use a `range` object to select various columns. -
    Example: Select columns by index ("key" column and the two first/last columns): -

Other tags:
    Note: -

Returns:
  • (Selector) -

Parameters:
  • indices (Array) --
def self.by_index(*indices, require_all: true)
  all_indices = []
  indices.each do |idx|
    if idx.is_a?(Enumerable)
      all_indices.concat(idx.to_a)
    elsif idx.is_a?(Integer)
      all_indices << idx
    else
      msg = "invalid index value: #{idx.inspect}"
      raise TypeError, msg
    end
  end
  Selector._from_rbselector(RbSelector.by_index(all_indices, require_all))
end

def self.by_name(*names, require_all: true)

Other tags:
    Example: Match all columns *except* for those given: -
    Example: Match *any* of the given columns by name: -
    Example: Select columns by name: -

Other tags:
    Note: -

Returns:
  • (Selector) -

Parameters:
  • require_all (Boolean) --
  • names (Array) --
def self.by_name(*names, require_all: true)
  all_names = []
  names.each do |nm|
    if nm.is_a?(::String)
      all_names << nm
    elsif nm.is_a?(::Array)
      nm.each do |n|
        if !n.is_a?(::String)
          msg = "invalid name: #{n.inspect}"
          raise TypeError, msg
        end
        all_names << n
      end
    else
      msg = "invalid name: #{nm.inspect}"
      raise TypeError, msg
    end
  end
  Selector._by_name(all_names, strict: require_all, expand_patterns: false)
end

def self.categorical

Other tags:
    Example: Select all columns *except* for those that are categorical: -
    Example: Select all categorical columns: -

Returns:
  • (Selector) -
def self.categorical
  Selector._from_rbselector(RbSelector.categorical)
end

def self.contains(*substring)

Other tags:
    Example: Select all columns *except* for those that contain the substring 'ba': -
    Example: Select columns that contain the substring 'ba' or the letter 'z': -
    Example: Select columns that contain the substring 'ba': -

Returns:
  • (Selector) -

Parameters:
  • substring (Object) --
def self.contains(*substring)
  escaped_substring = _re_string(substring)
  raw_params = "^.*#{escaped_substring}.*$"
  Selector._from_rbselector(RbSelector.matches(raw_params))
end

def self.date

Other tags:
    Example: Select all columns *except* for those that are dates: -
    Example: Select all date columns: -

Returns:
  • (Selector) -
def self.date
  by_dtype([Date])
end

def self.datetime(time_unit = nil, time_zone: ["*", nil])

Returns:
  • (Selector) -

Parameters:
  • time_zone (String) --
  • time_unit ('ms', 'us', 'ns') --
def self.datetime(time_unit = nil, time_zone: ["*", nil])
  if time_unit.nil?
    time_unit_lst = ["ms", "us", "ns"]
  else
    time_unit_lst = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
  end
  if time_zone.nil?
    time_zone_lst = [nil]
  elsif time_zone
    # TODO improve
    time_zone_lst = time_zone.to_a
  end
  Selector._from_rbselector(RbSelector.datetime(time_unit_lst, time_zone_lst))
end

def self.decimal

Other tags:
    Example: Select all columns *except* the decimal ones: -
    Example: Select all decimal columns: -

Returns:
  • (Selector) -
def self.decimal
  # TODO: allow explicit selection by scale/precision?
  Selector._from_rbselector(RbSelector.decimal)
end

def self.digit(ascii_only: false)

Other tags:
    Example: Demonstrate use of `ascii_only` flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9): -
    Example: Select all columns *except* for those with digit names: -
    Example: Select columns with digit names: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.digit(ascii_only: false)
  re_digit = ascii_only ? "[0-9]" : "\\d"
  Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$"))
end

def self.duration(time_unit = nil)

Returns:
  • (Selector) -

Parameters:
  • time_unit ('ms', 'us', 'ns') --
def self.duration(time_unit = nil)
  if time_unit.nil?
    time_unit = ["ms", "us", "ns"]
  else
    time_unit = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a
  end
  Selector._from_rbselector(RbSelector.duration(time_unit))
end

def self.empty

Returns:
  • (Selector) -
def self.empty
  Selector._from_rbselector(RbSelector.empty)
end

def self.ends_with(*suffix)

Other tags:
    Example: Select all columns *except* for those that end with the substring 'z': -
    Example: Select columns that end with *either* the letter 'z' or 'r': -
    Example: Select columns that end with the substring 'z': -

Returns:
  • (Selector) -

Parameters:
  • suffix (Object) --
def self.ends_with(*suffix)
  escaped_suffix = _re_string(suffix)
  raw_params = "^.*#{escaped_suffix}$"
  Selector._from_rbselector(RbSelector.matches(raw_params))
end

def self.enum

Other tags:
    Example: Select all columns *except* for those that are enum: -
    Example: Select all enum columns: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.enum
  Selector._from_rbselector(RbSelector.enum_)
end

def self.exclude(columns, *more_columns)

Other tags:
    Example: Exclude using a column name, a selector, and a dtype: -
    Example: Exclude by column name(s): -

Other tags:
    Note: -

Returns:
  • (Selector) -

Parameters:
  • more_columns (Array) --
  • columns (Object) --
def self.exclude(columns, *more_columns)
  ~_combine_as_selector(columns, *more_columns)
end

def self.first(strict: true)

Other tags:
    Example: Select everything *except* for the first column: -
    Example: Select the first column: -

Returns:
  • (Selector) -
def self.first(strict: true)
  Selector._from_rbselector(RbSelector.first(strict))
end

def self.float

Other tags:
    Example: Select all columns *except* for those that are float: -
    Example: Select all float columns: -

Returns:
  • (Selector) -
def self.float
  Selector._from_rbselector(RbSelector.float)
end

def self.integer

Other tags:
    Example: Select all columns *except* for those that are integer: -
    Example: Select all integer columns: -

Returns:
  • (Selector) -
def self.integer
  Selector._from_rbselector(RbSelector.integer)
end

def self.last(strict: true)

Other tags:
    Example: Select everything *except* for the last column: -
    Example: Select the last column: -

Returns:
  • (Selector) -
def self.last(strict: true)
  Selector._from_rbselector(RbSelector.last(strict))
end

def self.list(inner = nil)

Other tags:
    Example: Select all list columns with a certain matching inner type: -
    Example: Select all columns *except* for those that are list: -
    Example: Select all list columns: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.list(inner = nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.list(inner_s))
end

def self.matches(pattern)

Other tags:
    Example: Do not match column names ending in 'R' or 'z' (case-insensitively): -
    Example: Match column names containing an 'a', preceded by a character that is not 'z': -

Returns:
  • (Selector) -

Parameters:
  • pattern (String) --
def self.matches(pattern)
  if pattern == ".*"
    all
  else
    if pattern.start_with?(".*")
      pattern = pattern[2..]
    elsif pattern.end_with?(".*")
      pattern = pattern[..-3]
    end
    pfx = !pattern.start_with?("^") ? "^.*" : ""
    sfx = !pattern.end_with?("$") ? ".*$" : ""
    raw_params = "#{pfx}#{pattern}#{sfx}"
    Selector._from_rbselector(RbSelector.matches(raw_params))
  end
end

def self.nested

Other tags:
    Example: Select all columns *except* for those that are nested: -
    Example: Select all nested columns: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.nested
  Selector._from_rbselector(RbSelector.nested)
end

def self.numeric

Other tags:
    Example: Match all columns *except* for those that are numeric: -
    Example: Match all numeric columns: -

Returns:
  • (Selector) -
def self.numeric
  Selector._from_rbselector(RbSelector.numeric)
end

def self.object

Returns:
  • (Selector) -
def self.object
  Selector._from_rbselector(RbSelector.object)
end

def self.signed_integer

Other tags:
    Example: Select all integer columns (both signed and unsigned): -
    Example: Select all signed integer columns: -

Returns:
  • (Selector) -
def self.signed_integer
  Selector._from_rbselector(RbSelector.signed_integer)
end

def self.starts_with(*prefix)

Other tags:
    Example: Match all columns *except* for those starting with 'b': -
    Example: Match columns starting with *either* the letter 'b' or 'z': -
    Example: Match columns starting with a 'b': -

Returns:
  • (Selector) -

Parameters:
  • prefix (Object) --
def self.starts_with(*prefix)
  escaped_prefix = _re_string(prefix)
  raw_params = "^#{escaped_prefix}.*$"
  Selector._from_rbselector(RbSelector.matches(raw_params))
end

def self.string(include_categorical: false)

Other tags:
    Example: Group by all string *and* categorical columns: -
    Example: Group by all string columns, sum the numeric columns, then sort by the string cols: -

Returns:
  • (Selector) -
def self.string(include_categorical: false)
  string_dtypes = [String]
  if include_categorical
    string_dtypes << Categorical
  end
  by_dtype(string_dtypes)
end

def self.struct

Other tags:
    Example: Select all columns *except* for those that are struct: -
    Example: Select all struct columns: -

Other tags:
    Note: -

Returns:
  • (Selector) -
def self.struct
  Selector._from_rbselector(RbSelector.struct_)
end

def self.temporal

Other tags:
    Example: Match all columns *except* for temporal columns: -
    Example: Match all temporal columns *except* for time columns: -
    Example: Match all temporal columns: -

Returns:
  • (Selector) -
def self.temporal
  Selector._from_rbselector(RbSelector.temporal)
end

def self.time

Other tags:
    Example: Select all columns *except* for those that are times: -
    Example: Select all time columns: -

Returns:
  • (Selector) -
def self.time
  by_dtype([Time])
end

def self.unsigned_integer

Other tags:
    Example: Select all integer columns (both signed and unsigned): -
    Example: Select all columns *except* for those that are unsigned integers: -
    Example: Select all unsigned integer columns: -

Returns:
  • (Selector) -
def self.unsigned_integer
  Selector._from_rbselector(RbSelector.unsigned_integer)
end