// // AppleFoundationProvider.swift // oAI // // Apple Foundation Models provider (on-device Apple Intelligence) // // SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (C) 2026 Rune Olsen // // This file is part of oAI. // // oAI is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // oAI is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General // Public License for more details. // // You should have received a copy of the GNU Affero General Public // License along with oAI. If not, see . import Foundation import FoundationModels import os final class AppleFoundationProvider: AIProvider { let name = "Apple Intelligence" let capabilities = ProviderCapabilities( supportsStreaming: true, supportsVision: false, supportsTools: false, supportsOnlineSearch: false, maxContextLength: 4096 ) // MARK: - Models func listModels() async throws -> [ModelInfo] { [ ModelInfo( id: "apple-on-device", name: "Apple On-Device", description: "On-device Apple Intelligence model. Private, free, and works offline. 4K context window.", contextLength: 4096, pricing: ModelInfo.Pricing(prompt: 0, completion: 0), capabilities: ModelInfo.ModelCapabilities( vision: false, tools: false, online: false ) ) ] } func getModel(_ id: String) async throws -> ModelInfo? { try await listModels().first { $0.id == id } } func getCredits() async throws -> Credits? { nil } // MARK: - Streaming chat func streamChat(request: ChatRequest) -> AsyncThrowingStream { AsyncThrowingStream { continuation in Task { do { let session = try self.makeSession(for: request) let prompt = self.lastUserMessage(from: request) // streamResponse(to: String) → ResponseStream // Each snapshot.content is the full accumulated text so far (snapshot model). // We compute deltas by comparing each snapshot to the previous. let stream = session.streamResponse(to: prompt) var lastContent = "" for try await snapshot in stream { let current = snapshot.content if current.count > lastContent.count { let delta = String(current.dropFirst(lastContent.count)) continuation.yield(StreamChunk( id: UUID().uuidString, model: request.model, delta: StreamChunk.Delta(content: delta, role: "assistant"), finishReason: nil, usage: nil )) lastContent = current } } continuation.yield(StreamChunk( id: UUID().uuidString, model: request.model, delta: StreamChunk.Delta(content: nil, role: nil), finishReason: "stop", usage: nil )) continuation.finish() } catch let genError as LanguageModelSession.GenerationError { continuation.finish(throwing: self.mapGenerationError(genError)) } catch { continuation.finish(throwing: error) } } } } // MARK: - Non-streaming chat func chat(request: ChatRequest) async throws -> ChatResponse { let session = try makeSession(for: request) let prompt = lastUserMessage(from: request) let response: LanguageModelSession.Response = try await session.respond(to: prompt) return ChatResponse( id: UUID().uuidString, model: request.model, content: response.content, role: "assistant", finishReason: "stop", usage: nil, created: Date() ) } // MARK: - Tool messages (not supported in Phase 1) func chatWithToolMessages(model: String, messages: [[String: Any]], tools: [Tool]?, maxTokens: Int?, temperature: Double?) async throws -> ChatResponse { throw ProviderError.unknown("Tool calling requires Apple Foundation Models Phase 3.") } // MARK: - Session construction private func makeSession(for request: ChatRequest) throws -> LanguageModelSession { guard case .available = SystemLanguageModel.default.availability else { throw availabilityError() } // Build instructions: system prompt + prior conversation turns as formatted text. // Foundation Models sessions don't accept a message array — we inject history inline. var instructions = request.systemPrompt ?? "" let priorMessages = request.messages.dropLast().filter { $0.role != .system } if !priorMessages.isEmpty { let history = priorMessages .map { m -> String in let label = m.role == .user ? "User" : "Assistant" return "\(label): \(m.content)" } .joined(separator: "\n") instructions += "\n\nConversation so far:\n\(history)\n\nContinue from here." } return instructions.isEmpty ? LanguageModelSession() : LanguageModelSession(instructions: instructions) } private func lastUserMessage(from request: ChatRequest) -> String { request.messages.last(where: { $0.role == .user })?.content ?? "" } // MARK: - Error mapping private func availabilityError() -> Error { switch SystemLanguageModel.default.availability { case .unavailable(.deviceNotEligible): return ProviderError.unknown("This Mac doesn't support Apple Intelligence. Apple Silicon is required.") case .unavailable(.appleIntelligenceNotEnabled): return ProviderError.unknown("Apple Intelligence is not enabled. Open System Settings → Apple Intelligence to turn it on.") case .unavailable(.modelNotReady): return ProviderError.unknown("Apple Intelligence model is still downloading. Please wait and try again.") default: return ProviderError.unknown("Apple Intelligence is not available on this device.") } } private func mapGenerationError(_ error: LanguageModelSession.GenerationError) -> Error { switch error { case .exceededContextWindowSize: return ProviderError.unknown("Apple Intelligence context limit exceeded (4,096 tokens). Start a new chat or enable Progressive Summarization in Settings → Advanced.") case .rateLimited: return ProviderError.rateLimitExceeded case .guardrailViolation: return ProviderError.unknown("Apple Intelligence declined to respond to this message.") default: return error } } }