class ActiveGenie::DataExtractor::Generalist

def self.call(...)

def self.call(...)
  new(...).call
end

def call

def call
  messages = [
    {  role: 'system', content: prompt },
    {  role: 'user', content: @text }
  ]
  properties = data_to_extract_with_explanation
  function = JSON.parse(File.read(File.join(__dir__, 'generalist.json')), symbolize_names: true)
  function[:parameters][:properties] = properties
  function[:parameters][:required] = properties.keys
  response = function_calling(messages, function)
  simplify_response(response)
end

def data_to_extract_with_explanation

def data_to_extract_with_explanation
  return @data_to_extract unless @config.data_extractor.with_explanation
  with_explanation = {}
  @data_to_extract.each do |key, value|
    with_explanation[key] = value
    with_explanation["#{key}_explanation"] = {
      type: 'string',
      description: "
      The chain of thought that led to the conclusion about: #{key}.
      Can be blank if the user didn't provide any context
      "
    }
    with_explanation["#{key}_accuracy"] = {
      type: 'integer',
      description: '
      The accuracy of the extracted data, what is the percentage of confidence?
      When 100 it means the data is explicitly stated in the text.
      When 0 it means is no way to discover the data from the text
      '
    }
  end
  with_explanation
end

def function_calling(messages, function)

def function_calling(messages, function)
  response = ::ActiveGenie::Clients::UnifiedClient.function_calling(
    messages,
    function,
    config: @config
  )
  ActiveGenie::Logger.call(
    {
      code: :data_extractor,
      text: @text[0..30],
      data_to_extract: function[:parameters][:properties],
      extracted_data: response
    }
  )
  response
end

def initialize(text, data_to_extract, config: {})

Other tags:
    Example: Extract a person's details -

Returns:
  • (Hash) - The extracted data matching the schema structure. Each field will include

Parameters:
  • config (Hash) -- Additional config for the extraction process
  • data_to_extract (Hash) -- Schema defining the data structure to extract.
  • text (String) -- The input text to analyze and extract data from
def initialize(text, data_to_extract, config: {})
  @text = text
  @data_to_extract = data_to_extract
  @config = ActiveGenie.configuration.merge(config)
end

def min_accuracy

def min_accuracy
  @config.data_extractor.min_accuracy # default 70
end

def prompt

def prompt
  File.read(File.join(__dir__, 'generalist.md'))
end

def simplify_response(response)

def simplify_response(response)
  return response if @config.data_extractor.verbose
  simplified_response = {}
  @data_to_extract.each_key do |key|
    next unless response.key?(key.to_s)
    next if response.key?("#{key}_accuracy") && response["#{key}_accuracy"] < min_accuracy
    simplified_response[key] = response[key.to_s]
  end
  simplified_response
end