class DSPy::Teleprompt::DataHandler

Provides operations for large datasets during bootstrap and optimization
Data handling for optimization with efficient operations

def create_candidate_sets(num_sets, set_size, random_state: nil)

def create_candidate_sets(num_sets, set_size, random_state: nil)
  return Array.new(num_sets) { [] } if @examples.empty?
  
  if random_state
    srand(random_state)
  end
  candidate_sets = []
  actual_set_size = [set_size, @examples.size].min
  
  num_sets.times do |i|
    # Use different random state for each set to ensure variety
    current_seed = random_state ? random_state + i : nil
    if current_seed
      srand(current_seed)
    end
    
    set_examples = @examples.sample(actual_set_size)
    candidate_sets << set_examples
  end
  candidate_sets
end

def each_batch(batch_size)

def each_batch(batch_size)
  @examples.each_slice(batch_size)
end

def initialize(examples)

def initialize(examples)
  @examples = examples
end

def partition_by_success(successful_indices)

def partition_by_success(successful_indices)
  successful_examples = successful_indices.map { |i| @examples[i] if i < @examples.size }.compact
  failed_indices = (0...@examples.size).to_a - successful_indices
  failed_examples = failed_indices.map { |i| @examples[i] }
  [successful_examples, failed_examples]
end

def sample(n, random_state: nil)

def sample(n, random_state: nil)
  return [] if @examples.empty? || n <= 0
  
  # Handle case where n is larger than available examples
  actual_n = [n, @examples.size].min
  
  # Set random seed if provided
  if random_state
    srand(random_state)
  end
  @examples.sample(actual_n)
end

def shuffle(random_state: nil)

def shuffle(random_state: nil)
  if random_state
    srand(random_state)
  end
  @examples.shuffle
end

def statistics

def statistics
  {
    total_examples: @examples.size,
    example_types: @examples.map(&:class).uniq.map(&:name),
    memory_usage_estimate: @examples.size * 1000 # Rough estimate
  }
end

def stratified_sample(n, stratify_column: nil)

def stratified_sample(n, stratify_column: nil)
  # For now, fall back to regular sampling (can be enhanced later)
  sample(n)
end

Namespace

DSPy::Teleprompt

Extended Modules

DSPy::Teleprompt::DataHandler::T::Sig

Instance Methods

Defined in

lib/dspy/teleprompt/data_handler.rb