class ActiveGenie::Scorer::JuryBench


JuryBench.call(“Sample text”, “Evaluate technical accuracy”)
@example Usage with automatic jury recommendation
JuryBench.call(“Sample text”, “Evaluate grammar and clarity”, [“Grammar Expert”])
@example JuryBench usage with a single jury
including individual jury scores, reasoning, and a final aggregated score.
The Scorer process evaluates text based on given criteria and returns detailed feedback
with the ability to automatically recommend juries when none are specified.
using AI-powered evaluation. It supports both single and multiple jury scenarios,
The JuryBench class provides a foundation for Scorer text content against specified criteria

def self.call(...)

Returns:
  • (String) - :final_reasoning Detailed explanation of why the final score was reached
  • (Number) - :final_score The final score of the text based on the criteria and juries
  • (Hash) - The evaluation result containing the scores and reasoning

Parameters:
  • config (Hash) -- Additional configuration config that modify the Scorer behavior
  • juries (Array) -- Optional list of specific juries. If empty,
  • criteria (String) -- The evaluation criteria or rubric to assess against
  • text (String) -- The text content to be evaluated
def self.call(...)
  new(...).call
end

def build_function

def build_function
  {
    name: 'scorer',
    description: 'Score the text based on the given criteria.',
    parameters: {
      type: 'object',
      properties: properties,
      required: properties.keys
    }
  }
end

def call

def call
  messages = [
    {  role: 'system', content: PROMPT },
    {  role: 'user', content: "Scorer criteria: #{@criteria}" },
    {  role: 'user', content: "Text to score: #{@text}" }
  ]
  result = ::ActiveGenie::Providers::UnifiedProvider.function_calling(
    messages,
    build_function,
    config:
  )
  result['final_score'] = 0 if result['final_score'].nil?
  config.logger.call({
                       code: :Scorer,
                       text: @text[0..30],
                       criteria: @criteria[0..30],
                       juries: juries,
                       score: result['final_score'],
                       reasoning: result['final_reasoning']
                     })
  result
end

def config

def config
  @config ||= begin
    c = ActiveGenie.configuration.merge(@initial_config)
    c.llm.recommended_model = 'deepseek-chat' unless c.llm.recommended_model
    c
  end
end

def initialize(text, criteria, juries = [], config: {})

def initialize(text, criteria, juries = [], config: {})
  @text = text
  @criteria = criteria
  @param_juries = Array(juries).compact.uniq
  @initial_config = config
end

def juries

def juries
  @juries ||= if @param_juries.any?
                @param_juries
              else
                ::ActiveGenie::Lister::Juries.call(@text, @criteria, config:)
              end
end

def properties

def properties
  @properties ||= begin
    tmp = {}
    juries.each do |jury|
      tmp["#{jury}_reasoning"] = {
        type: 'string',
        description: "The reasoning of the Scorer process by #{jury}."
      }
      tmp["#{jury}_score"] = {
        type: 'number',
        description: "The score given by #{jury}.",
        min: 0,
        max: 100
      }
    end
    tmp[:final_score] = {
      type: 'number',
      description: 'The final score based on the previous juries'
    }
    tmp[:final_reasoning] = {
      type: 'string',
      description: 'The final reasoning based on the previous juries'
    }
    tmp
  end
end