module Polars::Selectors
def self._combine_as_selector(items, *more_items)
- Private: -
def self._combine_as_selector(items, *more_items) names, regexes, dtypes = [], [], [] selectors = [] ((items.is_a?(::Array) ? items : [items]) + more_items).each do |item| if Utils.is_selector(item) selectors << item elsif item.is_a?(::String) if item.start_with?("^") && item.end_with?("$") regexes << item else names << item end elsif Utils.is_polars_dtype(item) dtypes << item elsif Utils.is_column(item) names << item.meta.output_name else msg = "expected one or more `str`, `DataType` or selector; found #{item.inspect} instead." raise TypeError, msg end end selected = [] if names.any? selected << by_name(*names, require_all: false) end if dtypes.any? selected << by_dtype(*dtypes) end if regexes.any? raise Todo end if selectors.any? selected.concat(selectors) end selected.reduce(empty, :|) end
def self._re_string(string, escape: true)
- Private: -
def self._re_string(string, escape: true) if string.is_a?(::String) rx = escape ? Utils.re_escape(string) : string else strings = [] string.each do |st| if st.is_a?(Array) strings.concat(st) else strings << st end end rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|") end "(#{rx})" end
def self.all
- Example: Select all columns *except* for those matching the given dtypes: -
Example: Select all columns, casting them to string: -
Returns:
-
(Selector)-
def self.all Selector._from_rbselector(RbSelector.all) end
def self.alpha(ascii_only: false, ignore_spaces: false)
- Example: Select all columns *except* for those with alphabetic names: -
Example: Constrain the definition of "alphabetic" to ASCII characters only: -
Example: Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here: -
Other tags:
- Note: -
Returns:
-
(Selector)-
Parameters:
-
ignore_spaces(Boolean) -- -
ascii_only(Boolean) --
def self.alpha(ascii_only: false, ignore_spaces: false) # note that we need to supply a pattern compatible with the *rust* regex crate re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}" re_space = ignore_spaces ? " " : "" Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$")) end
def self.alphanumeric(ascii_only: false, ignore_spaces: false)
- Example: Select all columns *except* for those with alphanumeric names: -
Example: Select columns with alphanumeric names: -
Other tags:
- Note: -
Returns:
-
(Selector)-
Parameters:
-
ignore_spaces(Boolean) -- -
ascii_only(Boolean) --
def self.alphanumeric(ascii_only: false, ignore_spaces: false) # note that we need to supply patterns compatible with the *rust* regex crate re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}" re_digit = ascii_only ? "0-9" : "\\d" re_space = ignore_spaces ? " " : "" return Selector._from_rbselector( RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$") ) end
def self.array(inner = nil, width: nil)
- Example: Select all array columns with a certain matching inner type: -
Example: Select all columns *except* for those that are array: -
Example: Select all array columns: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.array(inner = nil, width: nil) inner_s = !inner.nil? ? inner._rbselector : nil Selector._from_rbselector(RbSelector.array(inner_s, width)) end
def self.binary
- Example: Select all columns *except* for those that are binary: -
Example: Select binary columns and export as a hash: -
Returns:
-
(Selector)-
def self.binary by_dtype([Binary]) end
def self.boolean
- Example: Select all columns *except* for those that are boolean: -
Example: Select and invert boolean columns: -
Returns:
-
(Selector)-
def self.boolean by_dtype([Boolean]) end
def self.by_dtype(*dtypes)
- Example: Select all columns that are not of date or string dtype: -
Example: Select all columns with date or string dtypes: -
Returns:
-
(Selector)-
def self.by_dtype(*dtypes) all_dtypes = [] dtypes.each do |tp| if Utils.is_polars_dtype(tp) || tp.is_a?(Class) all_dtypes << tp elsif tp.is_a?(::Array) tp.each do |t| if !(Utils.is_polars_dtype(t) || t.is_a?(Class)) msg = "invalid dtype: #{t.inspect}" raise TypeError, msg end all_dtypes << t end else msg = "invalid dtype: #{tp.inspect}" raise TypeError, msg end end Selector._by_dtype(all_dtypes) end
def self.by_index(*indices, require_all: true)
- Example: Select all columns *except* for the even-indexed ones: -
Example: Select the "key" column and use a `range` object to select various columns. -
Example: Select columns by index ("key" column and the two first/last columns): -
Other tags:
- Note: -
Returns:
-
(Selector)-
Parameters:
-
indices(Array) --
def self.by_index(*indices, require_all: true) all_indices = [] indices.each do |idx| if idx.is_a?(Enumerable) all_indices.concat(idx.to_a) elsif idx.is_a?(Integer) all_indices << idx else msg = "invalid index value: #{idx.inspect}" raise TypeError, msg end end Selector._from_rbselector(RbSelector.by_index(all_indices, require_all)) end
def self.by_name(*names, require_all: true)
- Example: Match all columns *except* for those given: -
Example: Match *any* of the given columns by name: -
Example: Select columns by name: -
Other tags:
- Note: -
Returns:
-
(Selector)-
Parameters:
-
require_all(Boolean) -- -
names(Array) --
def self.by_name(*names, require_all: true) all_names = [] names.each do |nm| if nm.is_a?(::String) all_names << nm elsif nm.is_a?(::Array) nm.each do |n| if !n.is_a?(::String) msg = "invalid name: #{n.inspect}" raise TypeError, msg end all_names << n end else msg = "invalid name: #{nm.inspect}" raise TypeError, msg end end Selector._by_name(all_names, strict: require_all, expand_patterns: false) end
def self.categorical
- Example: Select all columns *except* for those that are categorical: -
Example: Select all categorical columns: -
Returns:
-
(Selector)-
def self.categorical Selector._from_rbselector(RbSelector.categorical) end
def self.contains(*substring)
- Example: Select all columns *except* for those that contain the substring 'ba': -
Example: Select columns that contain the substring 'ba' or the letter 'z': -
Example: Select columns that contain the substring 'ba': -
Returns:
-
(Selector)-
Parameters:
-
substring(Object) --
def self.contains(*substring) escaped_substring = _re_string(substring) raw_params = "^.*#{escaped_substring}.*$" Selector._from_rbselector(RbSelector.matches(raw_params)) end
def self.date
- Example: Select all columns *except* for those that are dates: -
Example: Select all date columns: -
Returns:
-
(Selector)-
def self.date by_dtype([Date]) end
def self.datetime(time_unit = nil, time_zone: ["*", nil])
-
(Selector)-
Parameters:
-
time_zone(String) -- -
time_unit('ms', 'us', 'ns') --
def self.datetime(time_unit = nil, time_zone: ["*", nil]) if time_unit.nil? time_unit_lst = ["ms", "us", "ns"] else time_unit_lst = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a end if time_zone.nil? time_zone_lst = [nil] elsif time_zone # TODO improve time_zone_lst = time_zone.to_a end Selector._from_rbselector(RbSelector.datetime(time_unit_lst, time_zone_lst)) end
def self.decimal
- Example: Select all columns *except* the decimal ones: -
Example: Select all decimal columns: -
Returns:
-
(Selector)-
def self.decimal # TODO: allow explicit selection by scale/precision? Selector._from_rbselector(RbSelector.decimal) end
def self.digit(ascii_only: false)
- Example: Demonstrate use of `ascii_only` flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9): -
Example: Select all columns *except* for those with digit names: -
Example: Select columns with digit names: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.digit(ascii_only: false) re_digit = ascii_only ? "[0-9]" : "\\d" Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$")) end
def self.duration(time_unit = nil)
-
(Selector)-
Parameters:
-
time_unit('ms', 'us', 'ns') --
def self.duration(time_unit = nil) if time_unit.nil? time_unit = ["ms", "us", "ns"] else time_unit = time_unit.is_a?(::String) ? [time_unit] : time_unit.to_a end Selector._from_rbselector(RbSelector.duration(time_unit)) end
def self.empty
-
(Selector)-
def self.empty Selector._from_rbselector(RbSelector.empty) end
def self.ends_with(*suffix)
- Example: Select all columns *except* for those that end with the substring 'z': -
Example: Select columns that end with *either* the letter 'z' or 'r': -
Example: Select columns that end with the substring 'z': -
Returns:
-
(Selector)-
Parameters:
-
suffix(Object) --
def self.ends_with(*suffix) escaped_suffix = _re_string(suffix) raw_params = "^.*#{escaped_suffix}$" Selector._from_rbselector(RbSelector.matches(raw_params)) end
def self.enum
- Example: Select all columns *except* for those that are enum: -
Example: Select all enum columns: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.enum Selector._from_rbselector(RbSelector.enum_) end
def self.exclude(columns, *more_columns)
- Example: Exclude using a column name, a selector, and a dtype: -
Example: Exclude by column name(s): -
Other tags:
- Note: -
Returns:
-
(Selector)-
Parameters:
-
more_columns(Array) -- -
columns(Object) --
def self.exclude(columns, *more_columns) ~_combine_as_selector(columns, *more_columns) end
def self.first(strict: true)
- Example: Select everything *except* for the first column: -
Example: Select the first column: -
Returns:
-
(Selector)-
def self.first(strict: true) Selector._from_rbselector(RbSelector.first(strict)) end
def self.float
- Example: Select all columns *except* for those that are float: -
Example: Select all float columns: -
Returns:
-
(Selector)-
def self.float Selector._from_rbselector(RbSelector.float) end
def self.integer
- Example: Select all columns *except* for those that are integer: -
Example: Select all integer columns: -
Returns:
-
(Selector)-
def self.integer Selector._from_rbselector(RbSelector.integer) end
def self.last(strict: true)
- Example: Select everything *except* for the last column: -
Example: Select the last column: -
Returns:
-
(Selector)-
def self.last(strict: true) Selector._from_rbselector(RbSelector.last(strict)) end
def self.list(inner = nil)
- Example: Select all list columns with a certain matching inner type: -
Example: Select all columns *except* for those that are list: -
Example: Select all list columns: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.list(inner = nil) inner_s = !inner.nil? ? inner._rbselector : nil Selector._from_rbselector(RbSelector.list(inner_s)) end
def self.matches(pattern)
- Example: Do not match column names ending in 'R' or 'z' (case-insensitively): -
Example: Match column names containing an 'a', preceded by a character that is not 'z': -
Returns:
-
(Selector)-
Parameters:
-
pattern(String) --
def self.matches(pattern) if pattern == ".*" all else if pattern.start_with?(".*") pattern = pattern[2..] elsif pattern.end_with?(".*") pattern = pattern[..-3] end pfx = !pattern.start_with?("^") ? "^.*" : "" sfx = !pattern.end_with?("$") ? ".*$" : "" raw_params = "#{pfx}#{pattern}#{sfx}" Selector._from_rbselector(RbSelector.matches(raw_params)) end end
def self.nested
- Example: Select all columns *except* for those that are nested: -
Example: Select all nested columns: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.nested Selector._from_rbselector(RbSelector.nested) end
def self.numeric
- Example: Match all columns *except* for those that are numeric: -
Example: Match all numeric columns: -
Returns:
-
(Selector)-
def self.numeric Selector._from_rbselector(RbSelector.numeric) end
def self.object
-
(Selector)-
def self.object Selector._from_rbselector(RbSelector.object) end
def self.signed_integer
- Example: Select all integer columns (both signed and unsigned): -
Example: Select all signed integer columns: -
Returns:
-
(Selector)-
def self.signed_integer Selector._from_rbselector(RbSelector.signed_integer) end
def self.starts_with(*prefix)
- Example: Match all columns *except* for those starting with 'b': -
Example: Match columns starting with *either* the letter 'b' or 'z': -
Example: Match columns starting with a 'b': -
Returns:
-
(Selector)-
Parameters:
-
prefix(Object) --
def self.starts_with(*prefix) escaped_prefix = _re_string(prefix) raw_params = "^#{escaped_prefix}.*$" Selector._from_rbselector(RbSelector.matches(raw_params)) end
def self.string(include_categorical: false)
- Example: Group by all string *and* categorical columns: -
Example: Group by all string columns, sum the numeric columns, then sort by the string cols: -
Returns:
-
(Selector)-
def self.string(include_categorical: false) string_dtypes = [String] if include_categorical string_dtypes << Categorical end by_dtype(string_dtypes) end
def self.struct
- Example: Select all columns *except* for those that are struct: -
Example: Select all struct columns: -
Other tags:
- Note: -
Returns:
-
(Selector)-
def self.struct Selector._from_rbselector(RbSelector.struct_) end
def self.temporal
- Example: Match all columns *except* for temporal columns: -
Example: Match all temporal columns *except* for time columns: -
Example: Match all temporal columns: -
Returns:
-
(Selector)-
def self.temporal Selector._from_rbselector(RbSelector.temporal) end
def self.time
- Example: Select all columns *except* for those that are times: -
Example: Select all time columns: -
Returns:
-
(Selector)-
def self.time by_dtype([Time]) end
def self.unsigned_integer
- Example: Select all integer columns (both signed and unsigned): -
Example: Select all columns *except* for those that are unsigned integers: -
Example: Select all unsigned integer columns: -
Returns:
-
(Selector)-
def self.unsigned_integer Selector._from_rbselector(RbSelector.unsigned_integer) end