Add hermes

2026-07-03 23:49:35 +00:00 · 2026-06-01 14:15:44 +00:00 · 2026-06-01 14:15:44 +00:00 · 609b3384c0
commit 609b3384c0
parent 28bb03187d
9 changed files with 526 additions and 6 deletions
--- a/pkgs/auth2api/thinking.patch
+++ b/pkgs/auth2api/thinking.patch
@ -0,0 +1,130 @@
+--- a/src/config.ts
+++ b/src/config.ts
+@@ -44,6 +44,12 @@
+   "count-tokens-ms": number;
+ }
+ 
+export interface ThinkingConfig {
+  /** Default reasoning effort when the client sends no reasoning_effort.
+   *  Accepts: none | minimal | low | medium | high | xhigh (default: medium) */
+  "default-effort": string;
+}
+
+ export interface StatsConfig {
+   /** Default true. Set false to disable per-request stats recording entirely. */
+   enabled: boolean;
+@@ -58,6 +64,7 @@
+   "api-keys": Set<string>;
+   "body-limit": string;
+   cloaking: CloakingConfig;
+  thinking: ThinkingConfig;
+   timeouts: TimeoutConfig;
+   stats: StatsConfig;
+   debug: DebugMode;
+@@ -78,6 +85,9 @@
+     "cli-version": "2.1.88",
+     entrypoint: "cli",
+   },
+  thinking: {
+    "default-effort": "medium",
+  },
+   timeouts: {
+     "messages-ms": 120000,
+     "stream-messages-ms": 600000,
+--- a/src/handlers/openai.ts
+++ b/src/handlers/openai.ts
+@@ -540,7 +540,13 @@
+       const structured =
+         body.response_format?.type === "json_object" ||
+         body.response_format?.type === "json_schema";
+-      const translatedBody = openaiToAnthropic(body);
+      // Inject default thinking effort when the client hasn't specified one.
+      const effort = config.thinking?.["default-effort"] ?? "medium";
+      const bodyWithThinking =
+        body.reasoning_effort || effort === "none"
+          ? body
+          : { ...body, reasoning_effort: effort };
+      const translatedBody = openaiToAnthropic(bodyWithThinking);
+ 
+       if (isDebugLevel(config.debug, "verbose")) {
+         console.log(
+--- a/src/upstream/anthropic-api.ts
+++ b/src/upstream/anthropic-api.ts
+@@ -18,17 +18,17 @@
+ 
+   if (isHaiku) {
+     if (structured) {
+-      return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
+      return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
+     } else {
+-      return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
+      return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
+     }
+   }
+ 
+   if (structured) {
+-    return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
+    return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
+   }
+ 
+-  return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
+  return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
+ }
+ 
+ /**
+--- a/src/upstream/translator.ts
+++ b/src/upstream/translator.ts
+@@ -9,13 +9,14 @@
+   inputTokens: number,
+   outputTokens: number,
+   cachedTokens: number,
+  reasoningTokens: number = 0,
+ ): any {
+   return {
+     prompt_tokens: inputTokens,
+     completion_tokens: outputTokens,
+     total_tokens: inputTokens + outputTokens,
+     prompt_tokens_details: { cached_tokens: cachedTokens },
+-    completion_tokens_details: { reasoning_tokens: 0 },
+    completion_tokens_details: { reasoning_tokens: reasoningTokens },
+   };
+ }
+ 
+@@ -281,6 +282,7 @@
+ 
+ export function anthropicToOpenai(anthropicResp: any, model: string): any {
+   let textContent = "";
+  let reasoningContent = "";
+   const toolCalls: any[] = [];
+ 
+   if (Array.isArray(anthropicResp.content)) {
+@@ -288,7 +290,7 @@
+       if (block.type === "text") {
+         textContent += block.text;
+       } else if (block.type === "thinking" && block.thinking) {
+-        // thinking blocks not exposed in chat completions response
+        reasoningContent += block.thinking;
+       } else if (block.type === "tool_use") {
+         toolCalls.push({
+           id: block.id,
+@@ -303,10 +305,12 @@
+   }
+ 
+   const message: any = { role: "assistant", content: textContent || null };
+  if (reasoningContent) message.reasoning_content = reasoningContent;
+   if (toolCalls.length) message.tool_calls = toolCalls;
+ 
+   const inputTokens = anthropicResp.usage?.input_tokens || 0;
+   const outputTokens = anthropicResp.usage?.output_tokens || 0;
+  const reasoningTokens = anthropicResp.usage?.output_tokens_details?.thinking_tokens || 0;
+ 
+   return {
+     id: `chatcmpl-${uuidv4()}`,
+@@ -325,6 +329,7 @@
+       inputTokens,
+       outputTokens,
+       anthropicResp.usage?.cache_read_input_tokens || 0,
+      reasoningTokens,
+     ),
+   };
+ }