class Ollama::Documents::Splitters::Character
def initialize(separator: DEFAULT_SEPARATOR, include_separator: false, combining_string: "\n\n", chunk_size: 4096)
def initialize(separator: DEFAULT_SEPARATOR, include_separator: false, combining_string: "\n\n", chunk_size: 4096) @separator, @include_separator, @combining_string, @chunk_size = separator, include_separator, combining_string, chunk_size if include_separator @separator = Regexp.new("(#@separator)") end end
def split(text)
def split(text) texts = [] text.split(@separator) do |t| if @include_separator && t =~ @separator texts.last&.concat t else texts.push(t) end end result = [] current_text = +'' texts.each do |t| if current_text.size + t.size < @chunk_size current_text << t << @combining_string else current_text.empty? or result << current_text current_text = t end end current_text.empty? or result << current_text result end