class ActiveGenie::Scorer::JuryBench
JuryBench.call(“Sample text”, “Evaluate technical accuracy”)
@example Usage with automatic jury recommendation
JuryBench.call(“Sample text”, “Evaluate grammar and clarity”, [“Grammar Expert”])
@example JuryBench usage with a single jury
including individual jury scores, reasoning, and a final aggregated score.
The Scorer process evaluates text based on given criteria and returns detailed feedback
with the ability to automatically recommend juries when none are specified.
using AI-powered evaluation. It supports both single and multiple jury scenarios,
The JuryBench class provides a foundation for Scorer text content against specified criteria
def self.call(...)
-
(String)- :final_reasoning Detailed explanation of why the final score was reached -
(Number)- :final_score The final score of the text based on the criteria and juries -
(Hash)- The evaluation result containing the scores and reasoning
Parameters:
-
config(Hash) -- Additional configuration config that modify the Scorer behavior -
juries(Array) -- Optional list of specific juries. If empty, -
criteria(String) -- The evaluation criteria or rubric to assess against -
text(String) -- The text content to be evaluated
def self.call(...) new(...).call end
def build_function
def build_function { name: 'scorer', description: 'Score the text based on the given criteria.', parameters: { type: 'object', properties: properties, required: properties.keys } } end
def call
def call messages = [ { role: 'system', content: PROMPT }, { role: 'user', content: "Scorer criteria: #{@criteria}" }, { role: 'user', content: "Text to score: #{@text}" } ] result = ::ActiveGenie::Providers::UnifiedProvider.function_calling( messages, build_function, config: ) result['final_score'] = 0 if result['final_score'].nil? config.logger.call({ code: :Scorer, text: @text[0..30], criteria: @criteria[0..30], juries: juries, score: result['final_score'], reasoning: result['final_reasoning'] }) result end
def config
def config @config ||= begin c = ActiveGenie.configuration.merge(@initial_config) c.llm.recommended_model = 'deepseek-chat' unless c.llm.recommended_model c end end
def initialize(text, criteria, juries = [], config: {})
def initialize(text, criteria, juries = [], config: {}) @text = text @criteria = criteria @param_juries = Array(juries).compact.uniq @initial_config = config end
def juries
def juries @juries ||= if @param_juries.any? @param_juries else ::ActiveGenie::Lister::Juries.call(@text, @criteria, config:) end end
def properties
def properties @properties ||= begin tmp = {} juries.each do |jury| tmp["#{jury}_reasoning"] = { type: 'string', description: "The reasoning of the Scorer process by #{jury}." } tmp["#{jury}_score"] = { type: 'number', description: "The score given by #{jury}.", min: 0, max: 100 } end tmp[:final_score] = { type: 'number', description: 'The final score based on the previous juries' } tmp[:final_reasoning] = { type: 'string', description: 'The final reasoning based on the previous juries' } tmp end end