class Ollama::Documents::Splitters::RecursiveCharacter
def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, combining_string: "\n\n", chunk_size: 4096)
def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, combining_string: "\n\n", chunk_size: 4096) separators.empty? and raise ArgumentError, "non-empty array of separators required" @separators, @include_separator, @combining_string, @chunk_size = separators, include_separator, combining_string, chunk_size end
def split(text, separators: @separators)
def split(text, separators: @separators) separators.empty? and return [ text ] separators = separators.dup separator = separators.shift texts = Character.new( separator:, include_separator: @include_separator, combining_string: @combining_string, chunk_size: @chunk_size ).split(text) texts.count == 0 and return [ text ] texts.inject([]) do |r, t| if t.size > @chunk_size r.concat(split(t, separators:)) else r.concat([ t ]) end end end