Add Anthropic prompt caching (direct + via OpenRouter)

Caches the system prompt/tools and growing conversation history via cache_control breakpoints, cutting cost and latency on repeated turns. Covers both the regular chat path and the tool-calling loop (chatWithToolMessages), which has its own request-building code and was initially missed. Cost calculation now accounts for cache write/read pricing instead of treating all input tokens as full price. Verified live: cache reads grow turn-over-turn in oAI.log. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-18 12:43:32 +02:00
parent a793fdacc4
commit 5b99a6f81c
5 changed files with 131 additions and 21 deletions
@@ -130,11 +130,23 @@ struct ChatResponse: Codable {
        let promptTokens: Int
        let completionTokens: Int
        let totalTokens: Int
+        let cacheCreationInputTokens: Int?
+        let cacheReadInputTokens: Int?
+
+        init(promptTokens: Int, completionTokens: Int, totalTokens: Int, cacheCreationInputTokens: Int? = nil, cacheReadInputTokens: Int? = nil) {
+            self.promptTokens = promptTokens
+            self.completionTokens = completionTokens
+            self.totalTokens = totalTokens
+            self.cacheCreationInputTokens = cacheCreationInputTokens
+            self.cacheReadInputTokens = cacheReadInputTokens
+        }

        enum CodingKeys: String, CodingKey {
            case promptTokens = "prompt_tokens"
            case completionTokens = "completion_tokens"
            case totalTokens = "total_tokens"
+            case cacheCreationInputTokens = "cache_creation_input_tokens"
+            case cacheReadInputTokens = "cache_read_input_tokens"
        }
    }