Add Anthropic prompt caching (direct + via OpenRouter)
Caches the system prompt/tools and growing conversation history via cache_control breakpoints, cutting cost and latency on repeated turns. Covers both the regular chat path and the tool-calling loop (chatWithToolMessages), which has its own request-building code and was initially missed. Cost calculation now accounts for cache write/read pricing instead of treating all input tokens as full price. Verified live: cache reads grow turn-over-turn in oAI.log. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -198,6 +198,11 @@ class OpenRouterProvider: AIProvider {
|
||||
}
|
||||
if let maxTokens = maxTokens { body["max_tokens"] = maxTokens }
|
||||
if let temperature = temperature { body["temperature"] = temperature }
|
||||
// Anthropic models require an explicit cache_control opt-in on OpenRouter;
|
||||
// other providers cache automatically.
|
||||
if model.hasPrefix("anthropic/") {
|
||||
body["cache_control"] = ["type": "ephemeral"]
|
||||
}
|
||||
|
||||
var urlRequest = URLRequest(url: url)
|
||||
urlRequest.httpMethod = "POST"
|
||||
@@ -388,6 +393,12 @@ class OpenRouterProvider: AIProvider {
|
||||
ReasoningAPIConfig(effort: $0.effort, exclude: $0.exclude ? true : nil)
|
||||
}
|
||||
|
||||
// Anthropic models require an explicit cache_control opt-in on OpenRouter;
|
||||
// other providers (OpenAI, DeepSeek, Gemini, Grok, etc.) cache automatically.
|
||||
let cacheControl: OpenRouterChatRequest.CacheControl? = effectiveModel.hasPrefix("anthropic/")
|
||||
? .init(type: "ephemeral")
|
||||
: nil
|
||||
|
||||
return OpenRouterChatRequest(
|
||||
model: effectiveModel,
|
||||
messages: apiMessages,
|
||||
@@ -398,7 +409,8 @@ class OpenRouterProvider: AIProvider {
|
||||
tools: request.tools,
|
||||
toolChoice: request.tools != nil ? "auto" : nil,
|
||||
modalities: request.imageGeneration ? ["text", "image"] : nil,
|
||||
reasoning: reasoningConfig
|
||||
reasoning: reasoningConfig,
|
||||
cacheControl: cacheControl
|
||||
)
|
||||
}
|
||||
|
||||
@@ -416,6 +428,11 @@ class OpenRouterProvider: AIProvider {
|
||||
let allImages = topLevelImages + blockImages
|
||||
let images: [Data]? = allImages.isEmpty ? nil : allImages
|
||||
|
||||
if let details = apiResponse.usage?.promptTokensDetails,
|
||||
details.cachedTokens != nil || details.cacheWriteTokens != nil {
|
||||
Log.api.info("OpenRouter cache usage: model=\(apiResponse.model), created=\(details.cacheWriteTokens ?? 0), read=\(details.cachedTokens ?? 0)")
|
||||
}
|
||||
|
||||
return ChatResponse(
|
||||
id: apiResponse.id,
|
||||
model: apiResponse.model,
|
||||
@@ -426,7 +443,9 @@ class OpenRouterProvider: AIProvider {
|
||||
ChatResponse.Usage(
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens
|
||||
totalTokens: usage.totalTokens,
|
||||
cacheCreationInputTokens: usage.promptTokensDetails?.cacheWriteTokens,
|
||||
cacheReadInputTokens: usage.promptTokensDetails?.cachedTokens
|
||||
)
|
||||
},
|
||||
created: Date(timeIntervalSince1970: TimeInterval(apiResponse.created)),
|
||||
@@ -446,6 +465,11 @@ class OpenRouterProvider: AIProvider {
|
||||
let allImages = topLevelImages + blockImages
|
||||
let images: [Data]? = allImages.isEmpty ? nil : allImages
|
||||
|
||||
if let details = apiChunk.usage?.promptTokensDetails,
|
||||
details.cachedTokens != nil || details.cacheWriteTokens != nil {
|
||||
Log.api.info("OpenRouter stream cache usage: model=\(apiChunk.model), created=\(details.cacheWriteTokens ?? 0), read=\(details.cachedTokens ?? 0)")
|
||||
}
|
||||
|
||||
return StreamChunk(
|
||||
id: apiChunk.id,
|
||||
model: apiChunk.model,
|
||||
@@ -460,7 +484,9 @@ class OpenRouterProvider: AIProvider {
|
||||
ChatResponse.Usage(
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens
|
||||
totalTokens: usage.totalTokens,
|
||||
cacheCreationInputTokens: usage.promptTokensDetails?.cacheWriteTokens,
|
||||
cacheReadInputTokens: usage.promptTokensDetails?.cachedTokens
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user