mirror of
https://codeberg.org/muon/home.git
synced 2026-07-03 23:49:35 +00:00
Add hermes
This commit is contained in:
parent
28bb03187d
commit
609b3384c0
9 changed files with 526 additions and 6 deletions
130
pkgs/auth2api/thinking.patch
Normal file
130
pkgs/auth2api/thinking.patch
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
--- a/src/config.ts
|
||||
+++ b/src/config.ts
|
||||
@@ -44,6 +44,12 @@
|
||||
"count-tokens-ms": number;
|
||||
}
|
||||
|
||||
+export interface ThinkingConfig {
|
||||
+ /** Default reasoning effort when the client sends no reasoning_effort.
|
||||
+ * Accepts: none | minimal | low | medium | high | xhigh (default: medium) */
|
||||
+ "default-effort": string;
|
||||
+}
|
||||
+
|
||||
export interface StatsConfig {
|
||||
/** Default true. Set false to disable per-request stats recording entirely. */
|
||||
enabled: boolean;
|
||||
@@ -58,6 +64,7 @@
|
||||
"api-keys": Set<string>;
|
||||
"body-limit": string;
|
||||
cloaking: CloakingConfig;
|
||||
+ thinking: ThinkingConfig;
|
||||
timeouts: TimeoutConfig;
|
||||
stats: StatsConfig;
|
||||
debug: DebugMode;
|
||||
@@ -78,6 +85,9 @@
|
||||
"cli-version": "2.1.88",
|
||||
entrypoint: "cli",
|
||||
},
|
||||
+ thinking: {
|
||||
+ "default-effort": "medium",
|
||||
+ },
|
||||
timeouts: {
|
||||
"messages-ms": 120000,
|
||||
"stream-messages-ms": 600000,
|
||||
--- a/src/handlers/openai.ts
|
||||
+++ b/src/handlers/openai.ts
|
||||
@@ -540,7 +540,13 @@
|
||||
const structured =
|
||||
body.response_format?.type === "json_object" ||
|
||||
body.response_format?.type === "json_schema";
|
||||
- const translatedBody = openaiToAnthropic(body);
|
||||
+ // Inject default thinking effort when the client hasn't specified one.
|
||||
+ const effort = config.thinking?.["default-effort"] ?? "medium";
|
||||
+ const bodyWithThinking =
|
||||
+ body.reasoning_effort || effort === "none"
|
||||
+ ? body
|
||||
+ : { ...body, reasoning_effort: effort };
|
||||
+ const translatedBody = openaiToAnthropic(bodyWithThinking);
|
||||
|
||||
if (isDebugLevel(config.debug, "verbose")) {
|
||||
console.log(
|
||||
--- a/src/upstream/anthropic-api.ts
|
||||
+++ b/src/upstream/anthropic-api.ts
|
||||
@@ -18,17 +18,17 @@
|
||||
|
||||
if (isHaiku) {
|
||||
if (structured) {
|
||||
- return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
|
||||
+ return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15";
|
||||
} else {
|
||||
- return "oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
|
||||
+ return "oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,claude-code-20250219";
|
||||
}
|
||||
}
|
||||
|
||||
if (structured) {
|
||||
- return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
|
||||
+ return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24,structured-outputs-2025-12-15";
|
||||
}
|
||||
|
||||
- return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
|
||||
+ return "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advanced-tool-use-2025-11-20,effort-2025-11-24";
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/src/upstream/translator.ts
|
||||
+++ b/src/upstream/translator.ts
|
||||
@@ -9,13 +9,14 @@
|
||||
inputTokens: number,
|
||||
outputTokens: number,
|
||||
cachedTokens: number,
|
||||
+ reasoningTokens: number = 0,
|
||||
): any {
|
||||
return {
|
||||
prompt_tokens: inputTokens,
|
||||
completion_tokens: outputTokens,
|
||||
total_tokens: inputTokens + outputTokens,
|
||||
prompt_tokens_details: { cached_tokens: cachedTokens },
|
||||
- completion_tokens_details: { reasoning_tokens: 0 },
|
||||
+ completion_tokens_details: { reasoning_tokens: reasoningTokens },
|
||||
};
|
||||
}
|
||||
|
||||
@@ -281,6 +282,7 @@
|
||||
|
||||
export function anthropicToOpenai(anthropicResp: any, model: string): any {
|
||||
let textContent = "";
|
||||
+ let reasoningContent = "";
|
||||
const toolCalls: any[] = [];
|
||||
|
||||
if (Array.isArray(anthropicResp.content)) {
|
||||
@@ -288,7 +290,7 @@
|
||||
if (block.type === "text") {
|
||||
textContent += block.text;
|
||||
} else if (block.type === "thinking" && block.thinking) {
|
||||
- // thinking blocks not exposed in chat completions response
|
||||
+ reasoningContent += block.thinking;
|
||||
} else if (block.type === "tool_use") {
|
||||
toolCalls.push({
|
||||
id: block.id,
|
||||
@@ -303,10 +305,12 @@
|
||||
}
|
||||
|
||||
const message: any = { role: "assistant", content: textContent || null };
|
||||
+ if (reasoningContent) message.reasoning_content = reasoningContent;
|
||||
if (toolCalls.length) message.tool_calls = toolCalls;
|
||||
|
||||
const inputTokens = anthropicResp.usage?.input_tokens || 0;
|
||||
const outputTokens = anthropicResp.usage?.output_tokens || 0;
|
||||
+ const reasoningTokens = anthropicResp.usage?.output_tokens_details?.thinking_tokens || 0;
|
||||
|
||||
return {
|
||||
id: `chatcmpl-${uuidv4()}`,
|
||||
@@ -325,6 +329,7 @@
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
anthropicResp.usage?.cache_read_input_tokens || 0,
|
||||
+ reasoningTokens,
|
||||
),
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue