Add Anthropic prompt caching (direct + via OpenRouter)

Caches the system prompt/tools and growing conversation history via
cache_control breakpoints, cutting cost and latency on repeated turns.
Covers both the regular chat path and the tool-calling loop
(chatWithToolMessages), which has its own request-building code and was
initially missed. Cost calculation now accounts for cache write/read
pricing instead of treating all input tokens as full price. Verified
live: cache reads grow turn-over-turn in oAI.log.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-18 12:43:32 +02:00
parent a793fdacc4
commit 5b99a6f81c
5 changed files with 131 additions and 21 deletions
+29 -3
View File
@@ -198,6 +198,11 @@ class OpenRouterProvider: AIProvider {
}
if let maxTokens = maxTokens { body["max_tokens"] = maxTokens }
if let temperature = temperature { body["temperature"] = temperature }
// Anthropic models require an explicit cache_control opt-in on OpenRouter;
// other providers cache automatically.
if model.hasPrefix("anthropic/") {
body["cache_control"] = ["type": "ephemeral"]
}
var urlRequest = URLRequest(url: url)
urlRequest.httpMethod = "POST"
@@ -388,6 +393,12 @@ class OpenRouterProvider: AIProvider {
ReasoningAPIConfig(effort: $0.effort, exclude: $0.exclude ? true : nil)
}
// Anthropic models require an explicit cache_control opt-in on OpenRouter;
// other providers (OpenAI, DeepSeek, Gemini, Grok, etc.) cache automatically.
let cacheControl: OpenRouterChatRequest.CacheControl? = effectiveModel.hasPrefix("anthropic/")
? .init(type: "ephemeral")
: nil
return OpenRouterChatRequest(
model: effectiveModel,
messages: apiMessages,
@@ -398,7 +409,8 @@ class OpenRouterProvider: AIProvider {
tools: request.tools,
toolChoice: request.tools != nil ? "auto" : nil,
modalities: request.imageGeneration ? ["text", "image"] : nil,
reasoning: reasoningConfig
reasoning: reasoningConfig,
cacheControl: cacheControl
)
}
@@ -416,6 +428,11 @@ class OpenRouterProvider: AIProvider {
let allImages = topLevelImages + blockImages
let images: [Data]? = allImages.isEmpty ? nil : allImages
if let details = apiResponse.usage?.promptTokensDetails,
details.cachedTokens != nil || details.cacheWriteTokens != nil {
Log.api.info("OpenRouter cache usage: model=\(apiResponse.model), created=\(details.cacheWriteTokens ?? 0), read=\(details.cachedTokens ?? 0)")
}
return ChatResponse(
id: apiResponse.id,
model: apiResponse.model,
@@ -426,7 +443,9 @@ class OpenRouterProvider: AIProvider {
ChatResponse.Usage(
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens
totalTokens: usage.totalTokens,
cacheCreationInputTokens: usage.promptTokensDetails?.cacheWriteTokens,
cacheReadInputTokens: usage.promptTokensDetails?.cachedTokens
)
},
created: Date(timeIntervalSince1970: TimeInterval(apiResponse.created)),
@@ -446,6 +465,11 @@ class OpenRouterProvider: AIProvider {
let allImages = topLevelImages + blockImages
let images: [Data]? = allImages.isEmpty ? nil : allImages
if let details = apiChunk.usage?.promptTokensDetails,
details.cachedTokens != nil || details.cacheWriteTokens != nil {
Log.api.info("OpenRouter stream cache usage: model=\(apiChunk.model), created=\(details.cacheWriteTokens ?? 0), read=\(details.cachedTokens ?? 0)")
}
return StreamChunk(
id: apiChunk.id,
model: apiChunk.model,
@@ -460,7 +484,9 @@ class OpenRouterProvider: AIProvider {
ChatResponse.Usage(
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens
totalTokens: usage.totalTokens,
cacheCreationInputTokens: usage.promptTokensDetails?.cacheWriteTokens,
cacheReadInputTokens: usage.promptTokensDetails?.cachedTokens
)
}
)