# frozen_string_literal: truemoduleClackymoduleMessageFormat# Static helpers for OpenAI-compatible API message format.## The canonical internal @messages format IS OpenAI format, so this module# mainly handles response parsing, tool result formatting, and message# type identification — minimal transformation needed.moduleOpenAImodule_function# ── Message type identification ───────────────────────────────────────────# Returns true if the message is a canonical tool result.deftool_result_message?(msg)msg[:role]=="tool"&&!msg[:tool_call_id].nil?end# Returns the tool_call_ids referenced in a tool result message.deftool_call_ids(msg)return[]unlesstool_result_message?(msg)[msg[:tool_call_id]]end# ── Request building ──────────────────────────────────────────────────────# Build an OpenAI-compatible request body.## Messages go through the canonical→OpenAI conversion layer# (normalize_messages). For most models this is identity because# the internal canonical format IS OpenAI format. The conversion# handles one edge case: image_url content blocks are stripped# when vision_supported is false (e.g. DeepSeek, Kimi, MiniMax),# replacing them with a text placeholder so the API doesn't reject# the request with "unknown variant 'image_url'".## @param messages [Array<Hash>] canonical messages# @param model [String]# @param tools [Array<Hash>] OpenAI-style tool definitions# @param max_tokens [Integer]# @param caching_enabled [Boolean] (only effective for Claude via OpenRouter)# @param vision_supported [Boolean] whether the target model accepts# image_url content blocks (default true, conservative)# @return [Hash]defbuild_request_body(messages,model,tools,max_tokens,caching_enabled,vision_supported: true,reasoning_effort: nil)api_messages=messages.map{|msg|normalize_message_content(msg,vision_supported: vision_supported)}body={model: model,max_tokens: max_tokens,messages: api_messages}iftools&.any?ifcaching_enabledcached_tools=deep_clone(tools)cached_tools.last[:cache_control]={type: "ephemeral"}body[:tools]=cached_toolselsebody[:tools]=toolsendendifreasoning_effort&&!reasoning_effort.to_s.empty?body[:reasoning_effort]=reasoning_effort.to_sendbodyend# ── Canonical → OpenAI conversion ─────────────────────────────────────────# Process a single message's content through the canonical→OpenAI# conversion layer. For String content this is a no-op; for Array# content each block goes through normalize_block.## @param msg [Hash] canonical message# @param vision_supported [Boolean]# @return [Hash] message with content normalised for OpenAI APIdefnormalize_message_content(msg,vision_supported:)content=msg[:content]returnmsgunlesscontent.is_a?(Array)blocks=content_to_blocks(content,vision_supported: vision_supported)# Most APIs reject empty content arrays — use a placeholder text block.blocks=[{type: "text",text: "..."}]ifblocks.empty?msg.merge(content: blocks)end# Convert canonical content array to OpenAI-compatible block array.# Each block goes through normalize_block; nil results are compacted.## @param content [Array<Hash>] canonical content blocks# @param vision_supported [Boolean]# @return [Array<Hash>]defcontent_to_blocks(content,vision_supported:)content.map{|b|normalize_block(b,vision_supported: vision_supported)}.compactend# Normalize a single canonical content block to OpenAI API format.## Canonical text blocks pass through (with cache_control preserved).# image_url blocks are kept for vision-capable models and replaced# with a text placeholder for non-vision models (DeepSeek, Kimi, etc.).## @param block [Hash] canonical content block# @param vision_supported [Boolean]# @return [Hash, nil] nil for empty-text blocks (dropped)defnormalize_block(block,vision_supported:)returnblockunlessblock.is_a?(Hash)caseblock[:type]when"text"# Drop empty text blocks — most APIs (Anthropic, DeepSeek, etc.)# reject { type: "text", text: "" }.text=block[:text]returnniliftext.nil?||text.empty?result={type: "text",text: text}result[:cache_control]=block[:cache_control]ifblock[:cache_control]resultwhen"image_url"ifvision_supportedblock# Pass through — GPT-4V, Gemini, etc. accept image_urlelse# Replace with text placeholder so the API doesn't reject the# request. The model will still see the context that an image# was present (from file_prompt / system_injected metadata).{type: "text",text: "[Image content removed — current model does not support vision input]"}endelseblock# Pass through unknown block types (tool_use, tool_result, etc.)endend# ── Response parsing ──────────────────────────────────────────────────────# Parse OpenAI-compatible API response into canonical internal format.# @param data [Hash] parsed JSON response body# @return [Hash]defparse_response(data)message=data["choices"].first["message"]usage=data["usage"]||{}raw_api_usage=usage.dupusage_data={prompt_tokens: usage["prompt_tokens"],completion_tokens: usage["completion_tokens"],total_tokens: usage["total_tokens"]}usage_data[:api_cost]=usage["cost"]ifusage["cost"]usage_data[:cache_creation_input_tokens]=usage["cache_creation_input_tokens"]ifusage["cache_creation_input_tokens"]usage_data[:cache_read_input_tokens]=usage["cache_read_input_tokens"]ifusage["cache_read_input_tokens"]# OpenRouter stores cache info under prompt_tokens_detailsif(details=usage["prompt_tokens_details"])usage_data[:cache_read_input_tokens]=details["cached_tokens"]ifdetails["cached_tokens"].to_i>0usage_data[:cache_creation_input_tokens]=details["cache_write_tokens"]ifdetails["cache_write_tokens"].to_i>0endresult={content: message["content"],tool_calls: parse_tool_calls(message["tool_calls"]),finish_reason: data["choices"].first["finish_reason"],usage: usage_data,raw_api_usage: raw_api_usage}# Preserve reasoning_content (e.g. Kimi/Moonshot extended thinking)result[:reasoning_content]=message["reasoning_content"]ifmessage["reasoning_content"]resultend# ── Tool result formatting ────────────────────────────────────────────────# Format tool results into canonical messages to append to @messages.# @return [Array<Hash>] canonical tool messagesdefformat_tool_results(response,tool_results)results_map=tool_results.each_with_object({}){|r,h|h[r[:id]]=r}response[:tool_calls].mapdo|tc|result=results_map[tc[:id]]raw_content=result?result[:content]:{error: "Tool result missing"}.to_json# OpenAI tool message content must be a String.# If a tool returned multipart Array blocks (e.g. screenshot image), convert to JSON.content=raw_content.is_a?(Array)?JSON.generate(raw_content):raw_content{role: "tool",tool_call_id: tc[:id],content: content}endend# ── Private helpers ───────────────────────────────────────────────────────private_class_methoddefself.parse_tool_calls(raw)returnnilifraw.nil?||raw.empty?raw.filter_mapdo|call|func=call["function"]||{}name=func["name"]arguments=func["arguments"]# Skip malformed tool calls where name or arguments is nil (broken API response)nextifname.nil?||arguments.nil?{id: call["id"],type: call["type"],name: name,arguments: arguments}endendprivate_class_methoddefself.deep_clone(obj)caseobjwhenHashthenobj.each_with_object({}){|(k,v),h|h[k]=deep_clone(v)}whenArraythenobj.map{|item|deep_clone(item)}elseobjendendendendend