Add hermes

This commit is contained in:
muon 2026-06-01 14:15:44 +00:00
parent 28bb03187d
commit 609b3384c0
9 changed files with 526 additions and 6 deletions

View file

@ -0,0 +1,130 @@
--- a/src/config.ts
+++ b/src/config.ts
@@ -44,6 +44,12 @@
"count-tokens-ms": number;
}
+export interface ThinkingConfig {
+ /** Default reasoning effort when the client sends no reasoning_effort.
+ * Accepts: none | minimal | low | medium | high | xhigh (default: medium) */
+ "default-effort": string;
+}
+
export interface StatsConfig {
/** Default true. Set false to disable per-request stats recording entirely. */
enabled: boolean;
@@ -58,6 +64,7 @@
"api-keys": Set<string>;
"body-limit": string;
cloaking: CloakingConfig;
+ thinking: ThinkingConfig;
timeouts: TimeoutConfig;
stats: StatsConfig;
debug: DebugMode;
@@ -78,6 +85,9 @@
"cli-version": "2.1.88",
entrypoint: "cli",
},
+ thinking: {
+ "default-effort": "medium",
+ },
timeouts: {
"messages-ms": 120000,
"stream-messages-ms": 600000,
--- a/src/handlers/openai.ts
+++ b/src/handlers/openai.ts
@@ -540,7 +540,13 @@
const structured =
body.response_format?.type === "json_object" ||
body.response_format?.type === "json_schema";
- const translatedBody = openaiToAnthropic(body);
+ // Inject default thinking effort when the client hasn't specified one.
+ const effort = config.thinking?.["default-effort"] ?? "medium";
+ const bodyWithThinking =
+ body.reasoning_effort || effort === "none"
+ ? body
+ : { ...body, reasoning_effort: effort };
+ const translatedBody = openaiToAnthropic(bodyWithThinking);
if (isDebugLevel(config.debug, "verbose")) {
console.log(
--- a/src/upstream/anthropic-api.ts
+++ b/src/upstream/anthropic-api.ts
@@ -18,17 +18,17 @@
if (isHaiku) {
if (structured) {
- return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
+ return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
} else {
- return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
+ return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
}
}
if (structured) {
- return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
+ return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
}
- return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
+ return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
}
/**
--- a/src/upstream/translator.ts
+++ b/src/upstream/translator.ts
@@ -9,13 +9,14 @@
inputTokens: number,
outputTokens: number,
cachedTokens: number,
+ reasoningTokens: number = 0,
): any {
return {
prompt_tokens: inputTokens,
completion_tokens: outputTokens,
total_tokens: inputTokens + outputTokens,
prompt_tokens_details: { cached_tokens: cachedTokens },
- completion_tokens_details: { reasoning_tokens: 0 },
+ completion_tokens_details: { reasoning_tokens: reasoningTokens },
};
}
@@ -281,6 +282,7 @@
export function anthropicToOpenai(anthropicResp: any, model: string): any {
let textContent = "";
+ let reasoningContent = "";
const toolCalls: any[] = [];
if (Array.isArray(anthropicResp.content)) {
@@ -288,7 +290,7 @@
if (block.type === "text") {
textContent += block.text;
} else if (block.type === "thinking" && block.thinking) {
- // thinking blocks not exposed in chat completions response
+ reasoningContent += block.thinking;
} else if (block.type === "tool_use") {
toolCalls.push({
id: block.id,
@@ -303,10 +305,12 @@
}
const message: any = { role: "assistant", content: textContent || null };
+ if (reasoningContent) message.reasoning_content = reasoningContent;
if (toolCalls.length) message.tool_calls = toolCalls;
const inputTokens = anthropicResp.usage?.input_tokens || 0;
const outputTokens = anthropicResp.usage?.output_tokens || 0;
+ const reasoningTokens = anthropicResp.usage?.output_tokens_details?.thinking_tokens || 0;
return {
id: `chatcmpl-${uuidv4()}`,
@@ -325,6 +329,7 @@
inputTokens,
outputTokens,
anthropicResp.usage?.cache_read_input_tokens || 0,
+ reasoningTokens,
),
};
}