class ActiveGenie::DataExtractor::Generalist
def self.call(...)
def self.call(...) new(...).call end
def call
def call messages = [ { role: 'system', content: prompt }, { role: 'user', content: @text } ] properties = data_to_extract_with_explanation function = JSON.parse(File.read(File.join(__dir__, 'generalist.json')), symbolize_names: true) function[:parameters][:properties] = properties function[:parameters][:required] = properties.keys response = function_calling(messages, function) simplify_response(response) end
def data_to_extract_with_explanation
def data_to_extract_with_explanation return @data_to_extract unless @config.data_extractor.with_explanation with_explanation = {} @data_to_extract.each do |key, value| with_explanation[key] = value with_explanation["#{key}_explanation"] = { type: 'string', description: " The chain of thought that led to the conclusion about: #{key}. Can be blank if the user didn't provide any context " } with_explanation["#{key}_accuracy"] = { type: 'integer', description: ' The accuracy of the extracted data, what is the percentage of confidence? When 100 it means the data is explicitly stated in the text. When 0 it means is no way to discover the data from the text ' } end with_explanation end
def function_calling(messages, function)
def function_calling(messages, function) response = ::ActiveGenie::Clients::UnifiedClient.function_calling( messages, function, config: @config ) ActiveGenie::Logger.call( { code: :data_extractor, text: @text[0..30], data_to_extract: function[:parameters][:properties], extracted_data: response } ) response end
def initialize(text, data_to_extract, config: {})
- Example: Extract a person's details -
Returns:
-
(Hash)
- The extracted data matching the schema structure. Each field will include
Parameters:
-
config
(Hash
) -- Additional config for the extraction process -
data_to_extract
(Hash
) -- Schema defining the data structure to extract. -
text
(String
) -- The input text to analyze and extract data from
def initialize(text, data_to_extract, config: {}) @text = text @data_to_extract = data_to_extract @config = ActiveGenie.configuration.merge(config) end
def min_accuracy
def min_accuracy @config.data_extractor.min_accuracy # default 70 end
def prompt
def prompt File.read(File.join(__dir__, 'generalist.md')) end
def simplify_response(response)
def simplify_response(response) return response if @config.data_extractor.verbose simplified_response = {} @data_to_extract.each_key do |key| next unless response.key?(key.to_s) next if response.key?("#{key}_accuracy") && response["#{key}_accuracy"] < min_accuracy simplified_response[key] = response[key.to_s] end simplified_response end