// // PaperlessService.swift // oAI // // Paperless-NGX integration: search, read, and upload documents via REST API // // SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (C) 2026 Rune Olsen // // This file is part of oAI. // // oAI is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // oAI is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General // Public License for more details. // // You should have received a copy of the GNU Affero General Public // License along with oAI. If not, see . import Foundation import os @Observable class PaperlessService { static let shared = PaperlessService() private let settings = SettingsService.shared private let log = Logger(subsystem: "com.oai.oAI", category: "mcp") private let readTimeout: TimeInterval = 15 private let uploadTimeout: TimeInterval = 60 private(set) var isConnected = false // In-memory caches for ID → name resolution private var tagCache: [Int: String] = [:] private var correspondentCache: [Int: String] = [:] private var documentTypeCache: [Int: String] = [:] private init() {} // MARK: - Connection Test func testConnection() async -> Result { do { let result = try await request(endpoint: "/api/documents/", queryParams: ["page_size": "1"]) if let count = result["count"] as? Int { isConnected = true return .success("Connected (\(count) document\(count == 1 ? "" : "s"))") } else { isConnected = true return .success("Connected to Paperless-NGX") } } catch { isConnected = false return .failure(error) } } // MARK: - Tool Schemas func getToolSchemas() -> [Tool] { return [ makeTool( name: "paperless_search", description: "Search for documents in Paperless-NGX by title, content, tags, or any text. Returns document metadata and a preview of OCR-extracted content. Use this to find invoices, contracts, letters, or any stored document.", properties: [ "query": prop("string", "Search query — can be text from document content, title, correspondent name, or tag"), "page": prop("number", "Page number for pagination (default: 1, each page has 25 results)") ], required: ["query"] ), makeTool( name: "paperless_get_document", description: "Get the full details and complete OCR-extracted text content of a specific Paperless-NGX document by ID. Use after paperless_search to read the full text of a document.", properties: [ "document_id": prop("number", "The numeric ID of the document to retrieve") ], required: ["document_id"] ), makeTool( name: "paperless_list_tags", description: "List all tags defined in Paperless-NGX with their document counts.", properties: [:], required: [] ), makeTool( name: "paperless_list_correspondents", description: "List all correspondents (senders/recipients) defined in Paperless-NGX with their document counts.", properties: [:], required: [] ), makeTool( name: "paperless_list_document_types", description: "List all document types defined in Paperless-NGX with their document counts.", properties: [:], required: [] ), makeTool( name: "paperless_upload_document", description: "Upload a local file to Paperless-NGX for OCR processing and storage. Supports PDF, PNG, JPEG, TIFF, and other image formats.", properties: [ "file_path": prop("string", "Absolute path to the local file to upload"), "title": prop("string", "Optional title for the document"), "tag_ids": prop("string", "Optional comma-separated tag IDs to assign (e.g. '1,3,7')") ], required: ["file_path"] ) ] } // MARK: - Tool Execution func executeTool(name: String, arguments: String) async -> [String: Any] { log.info("Executing Paperless tool: \(name)") guard let argData = arguments.data(using: .utf8), let args = try? JSONSerialization.jsonObject(with: argData) as? [String: Any] else { return ["error": "Invalid arguments JSON"] } do { switch name { case "paperless_search": guard let query = args["query"] as? String else { return ["error": "Missing required parameter: query"] } let page: Int if let p = args["page"] as? Int { page = p } else if let p = args["page"] as? Double { page = Int(p) } else { page = 1 } return try await searchDocuments(query: query, page: page) case "paperless_get_document": let docId: Int if let id = args["document_id"] as? Int { docId = id } else if let id = args["document_id"] as? Double { docId = Int(id) } else { return ["error": "Missing or invalid parameter: document_id (expected integer)"] } return try await getDocument(id: docId) case "paperless_list_tags": return try await listTags() case "paperless_list_correspondents": return try await listCorrespondents() case "paperless_list_document_types": return try await listDocumentTypes() case "paperless_upload_document": guard let filePath = args["file_path"] as? String else { return ["error": "Missing required parameter: file_path"] } let title = args["title"] as? String let tagIds = args["tag_ids"] as? String return try await uploadDocument(filePath: filePath, title: title, tagIds: tagIds) default: return ["error": "Unknown Paperless tool: \(name)"] } } catch PaperlessError.notConfigured { return ["error": "Paperless-NGX is not configured. Set your URL and API token in Settings > Paperless."] } catch PaperlessError.unauthorized { return ["error": "Invalid API token. Check your Paperless-NGX token in Settings > Paperless."] } catch PaperlessError.httpError(let code, let msg) { return ["error": "Paperless-NGX API error \(code): \(msg)"] } catch { return ["error": "Paperless error: \(error.localizedDescription)"] } } // MARK: - API Operations private func searchDocuments(query: String, page: Int) async throws -> [String: Any] { await prefetchCaches() let result = try await request(endpoint: "/api/documents/", queryParams: [ "query": query, "page": String(page) ]) let total = result["count"] as? Int ?? 0 guard let rawResults = result["results"] as? [[String: Any]] else { return ["total": total, "page": page, "results": []] } let formatted = rawResults.map { doc -> [String: Any] in var item: [String: Any] = [:] item["id"] = doc["id"] ?? 0 item["title"] = doc["title"] ?? "Untitled" item["created"] = (doc["created"] as? String).map { String($0.prefix(10)) } ?? "" if let corrId = doc["correspondent"] as? Int { item["correspondent"] = correspondentCache[corrId] ?? "ID:\(corrId)" } if let dtId = doc["document_type"] as? Int { item["document_type"] = documentTypeCache[dtId] ?? "ID:\(dtId)" } if let tagIds = doc["tags"] as? [Int] { item["tags"] = tagIds.map { tagCache[$0] ?? "ID:\($0)" } } // Content preview capped at 500 chars if let content = doc["content"] as? String, !content.isEmpty { let preview = content.trimmingCharacters(in: .whitespacesAndNewlines) item["content_preview"] = String(preview.prefix(500)) } return item } return ["total": total, "page": page, "results": formatted] } private func getDocument(id: Int) async throws -> [String: Any] { await prefetchCaches() let doc = try await request(endpoint: "/api/documents/\(id)/") var result: [String: Any] = [:] result["id"] = doc["id"] ?? id result["title"] = doc["title"] ?? "Untitled" result["created"] = (doc["created"] as? String).map { String($0.prefix(10)) } ?? "" result["added"] = (doc["added"] as? String).map { String($0.prefix(10)) } ?? "" result["modified"] = (doc["modified"] as? String).map { String($0.prefix(10)) } ?? "" if let corrId = doc["correspondent"] as? Int { result["correspondent"] = correspondentCache[corrId] ?? "ID:\(corrId)" } if let dtId = doc["document_type"] as? Int { result["document_type"] = documentTypeCache[dtId] ?? "ID:\(dtId)" } if let tagIds = doc["tags"] as? [Int] { result["tags"] = tagIds.map { tagCache[$0] ?? "ID:\($0)" } } if let asn = doc["archive_serial_number"] as? String { result["archive_serial_number"] = asn } // Full OCR content capped at 30,000 chars if let content = doc["content"] as? String { let trimmed = content.trimmingCharacters(in: .whitespacesAndNewlines) result["content"] = String(trimmed.prefix(30_000)) result["content_length"] = trimmed.count } return result } private func listTags() async throws -> [String: Any] { let result = try await request(endpoint: "/api/tags/", queryParams: ["page_size": "250"]) guard let items = result["results"] as? [[String: Any]] else { return ["count": 0, "tags": []] } let formatted = items.map { tag -> [String: Any] in ["id": tag["id"] ?? 0, "name": tag["name"] ?? "Unknown", "count": tag["document_count"] ?? 0] } return ["count": formatted.count, "tags": formatted] } private func listCorrespondents() async throws -> [String: Any] { let result = try await request(endpoint: "/api/correspondents/", queryParams: ["page_size": "250"]) guard let items = result["results"] as? [[String: Any]] else { return ["count": 0, "correspondents": []] } let formatted = items.map { c -> [String: Any] in ["id": c["id"] ?? 0, "name": c["name"] ?? "Unknown", "count": c["document_count"] ?? 0] } return ["count": formatted.count, "correspondents": formatted] } private func listDocumentTypes() async throws -> [String: Any] { let result = try await request(endpoint: "/api/document_types/", queryParams: ["page_size": "250"]) guard let items = result["results"] as? [[String: Any]] else { return ["count": 0, "document_types": []] } let formatted = items.map { dt -> [String: Any] in ["id": dt["id"] ?? 0, "name": dt["name"] ?? "Unknown", "count": dt["document_count"] ?? 0] } return ["count": formatted.count, "document_types": formatted] } private func uploadDocument(filePath: String, title: String?, tagIds: String?) async throws -> [String: Any] { let expanded = (filePath as NSString).expandingTildeInPath let resolved = (expanded as NSString).standardizingPath guard FileManager.default.fileExists(atPath: resolved) else { return ["error": "File not found: \(filePath)"] } guard let fileData = FileManager.default.contents(atPath: resolved) else { return ["error": "Cannot read file: \(filePath)"] } let fileName = (resolved as NSString).lastPathComponent guard let token = settings.paperlessAPIToken, !token.isEmpty else { throw PaperlessError.notConfigured } let baseURL = settings.paperlessURL guard !baseURL.isEmpty, let url = URL(string: baseURL + "/api/documents/post_document/") else { throw PaperlessError.notConfigured } let boundary = "PaperlessBoundary\(UUID().uuidString.replacingOccurrences(of: "-", with: ""))" var body = Data() func appendField(_ name: String, _ value: String) { body.append("--\(boundary)\r\n".data(using: .utf8)!) body.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n".data(using: .utf8)!) body.append("\(value)\r\n".data(using: .utf8)!) } if let title = title, !title.isEmpty { appendField("title", title) } if let tagIds = tagIds { let ids = tagIds.split(separator: ",").compactMap { Int($0.trimmingCharacters(in: .whitespaces)) } for id in ids { appendField("tags", String(id)) } } let mimeType = mimeTypeFor(fileName: fileName) body.append("--\(boundary)\r\n".data(using: .utf8)!) body.append("Content-Disposition: form-data; name=\"document\"; filename=\"\(fileName)\"\r\n".data(using: .utf8)!) body.append("Content-Type: \(mimeType)\r\n\r\n".data(using: .utf8)!) body.append(fileData) body.append("\r\n--\(boundary)--\r\n".data(using: .utf8)!) var urlRequest = URLRequest(url: url, timeoutInterval: uploadTimeout) urlRequest.httpMethod = "POST" urlRequest.setValue("Token \(token)", forHTTPHeaderField: "Authorization") urlRequest.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type") urlRequest.httpBody = body let (data, response) = try await URLSession.shared.data(for: urlRequest) guard let httpResponse = response as? HTTPURLResponse else { throw PaperlessError.httpError(0, "Invalid response") } if httpResponse.statusCode == 401 { throw PaperlessError.unauthorized } if (200...299).contains(httpResponse.statusCode) { return ["success": true, "message": "Document uploaded successfully. Paperless-NGX will process it shortly."] } let msg = String(data: data, encoding: .utf8) ?? "Unknown error" throw PaperlessError.httpError(httpResponse.statusCode, msg) } // MARK: - Cache Prefetch private func prefetchCaches() async { if tagCache.isEmpty { if let result = try? await request(endpoint: "/api/tags/", queryParams: ["page_size": "250"]), let items = result["results"] as? [[String: Any]] { for item in items { if let id = item["id"] as? Int, let name = item["name"] as? String { tagCache[id] = name } } } } if correspondentCache.isEmpty { if let result = try? await request(endpoint: "/api/correspondents/", queryParams: ["page_size": "250"]), let items = result["results"] as? [[String: Any]] { for item in items { if let id = item["id"] as? Int, let name = item["name"] as? String { correspondentCache[id] = name } } } } if documentTypeCache.isEmpty { if let result = try? await request(endpoint: "/api/document_types/", queryParams: ["page_size": "250"]), let items = result["results"] as? [[String: Any]] { for item in items { if let id = item["id"] as? Int, let name = item["name"] as? String { documentTypeCache[id] = name } } } } } // MARK: - HTTP Client private func request(endpoint: String, queryParams: [String: String] = [:]) async throws -> [String: Any] { guard let token = settings.paperlessAPIToken, !token.isEmpty else { throw PaperlessError.notConfigured } let baseURL = settings.paperlessURL guard !baseURL.isEmpty else { throw PaperlessError.notConfigured } var urlString = baseURL + endpoint if !queryParams.isEmpty { var comps = URLComponents(string: urlString) ?? URLComponents() comps.queryItems = queryParams.map { URLQueryItem(name: $0.key, value: $0.value) } urlString = comps.url?.absoluteString ?? urlString } guard let url = URL(string: urlString) else { throw PaperlessError.httpError(0, "Invalid URL: \(urlString)") } var urlRequest = URLRequest(url: url, timeoutInterval: readTimeout) urlRequest.httpMethod = "GET" urlRequest.setValue("Token \(token)", forHTTPHeaderField: "Authorization") urlRequest.setValue("application/json", forHTTPHeaderField: "Accept") do { let (data, response) = try await URLSession.shared.data(for: urlRequest) guard let httpResponse = response as? HTTPURLResponse else { throw PaperlessError.httpError(0, "Invalid response") } if httpResponse.statusCode == 401 { throw PaperlessError.unauthorized } guard (200...299).contains(httpResponse.statusCode) else { let msg = String(data: data, encoding: .utf8) ?? "Unknown error" throw PaperlessError.httpError(httpResponse.statusCode, msg) } guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { return [:] } return json } catch let error as PaperlessError { throw error } catch { throw PaperlessError.httpError(0, error.localizedDescription) } } // MARK: - Helpers private func mimeTypeFor(fileName: String) -> String { let ext = (fileName as NSString).pathExtension.lowercased() switch ext { case "pdf": return "application/pdf" case "png": return "image/png" case "jpg", "jpeg": return "image/jpeg" case "tiff", "tif": return "image/tiff" case "gif": return "image/gif" case "webp": return "image/webp" default: return "application/octet-stream" } } private func makeTool(name: String, description: String, properties: [String: Tool.Function.Parameters.Property], required: [String]) -> Tool { Tool( type: "function", function: Tool.Function( name: name, description: description, parameters: Tool.Function.Parameters( type: "object", properties: properties, required: required ) ) ) } private func prop(_ type: String, _ description: String) -> Tool.Function.Parameters.Property { Tool.Function.Parameters.Property(type: type, description: description, enum: nil) } } // MARK: - Error Types enum PaperlessError: LocalizedError { case notConfigured case unauthorized case httpError(Int, String) var errorDescription: String? { switch self { case .notConfigured: return "Paperless-NGX is not configured. Set your URL and API token in Settings > Paperless." case .unauthorized: return "Invalid API token. Check your Paperless-NGX token in Settings > Paperless." case .httpError(let code, let msg): return "Paperless-NGX API error \(code): \(msg)" } } }