497 lines
20 KiB
Swift
497 lines
20 KiB
Swift
//
|
|
// PaperlessService.swift
|
|
// oAI
|
|
//
|
|
// Paperless-NGX integration: search, read, and upload documents via REST API
|
|
//
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
// Copyright (C) 2026 Rune Olsen
|
|
//
|
|
// This file is part of oAI.
|
|
//
|
|
// oAI is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as
|
|
// published by the Free Software Foundation, either version 3 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// oAI is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
|
|
// Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public
|
|
// License along with oAI. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
import Foundation
|
|
import os
|
|
|
|
@Observable
|
|
class PaperlessService {
|
|
static let shared = PaperlessService()
|
|
|
|
private let settings = SettingsService.shared
|
|
private let log = Logger(subsystem: "com.oai.oAI", category: "mcp")
|
|
private let readTimeout: TimeInterval = 15
|
|
private let uploadTimeout: TimeInterval = 60
|
|
|
|
private(set) var isConnected = false
|
|
|
|
// In-memory caches for ID → name resolution
|
|
private var tagCache: [Int: String] = [:]
|
|
private var correspondentCache: [Int: String] = [:]
|
|
private var documentTypeCache: [Int: String] = [:]
|
|
|
|
private init() {}
|
|
|
|
// MARK: - Connection Test
|
|
|
|
func testConnection() async -> Result<String, Error> {
|
|
do {
|
|
let result = try await request(endpoint: "/api/documents/", queryParams: ["page_size": "1"])
|
|
if let count = result["count"] as? Int {
|
|
isConnected = true
|
|
return .success("Connected (\(count) document\(count == 1 ? "" : "s"))")
|
|
} else {
|
|
isConnected = true
|
|
return .success("Connected to Paperless-NGX")
|
|
}
|
|
} catch {
|
|
isConnected = false
|
|
return .failure(error)
|
|
}
|
|
}
|
|
|
|
// MARK: - Tool Schemas
|
|
|
|
func getToolSchemas() -> [Tool] {
|
|
return [
|
|
makeTool(
|
|
name: "paperless_search",
|
|
description: "Search for documents in Paperless-NGX by title, content, tags, or any text. Returns document metadata and a preview of OCR-extracted content. Use this to find invoices, contracts, letters, or any stored document.",
|
|
properties: [
|
|
"query": prop("string", "Search query — can be text from document content, title, correspondent name, or tag"),
|
|
"page": prop("number", "Page number for pagination (default: 1, each page has 25 results)")
|
|
],
|
|
required: ["query"]
|
|
),
|
|
makeTool(
|
|
name: "paperless_get_document",
|
|
description: "Get the full details and complete OCR-extracted text content of a specific Paperless-NGX document by ID. Use after paperless_search to read the full text of a document.",
|
|
properties: [
|
|
"document_id": prop("number", "The numeric ID of the document to retrieve")
|
|
],
|
|
required: ["document_id"]
|
|
),
|
|
makeTool(
|
|
name: "paperless_list_tags",
|
|
description: "List all tags defined in Paperless-NGX with their document counts.",
|
|
properties: [:],
|
|
required: []
|
|
),
|
|
makeTool(
|
|
name: "paperless_list_correspondents",
|
|
description: "List all correspondents (senders/recipients) defined in Paperless-NGX with their document counts.",
|
|
properties: [:],
|
|
required: []
|
|
),
|
|
makeTool(
|
|
name: "paperless_list_document_types",
|
|
description: "List all document types defined in Paperless-NGX with their document counts.",
|
|
properties: [:],
|
|
required: []
|
|
),
|
|
makeTool(
|
|
name: "paperless_upload_document",
|
|
description: "Upload a local file to Paperless-NGX for OCR processing and storage. Supports PDF, PNG, JPEG, TIFF, and other image formats.",
|
|
properties: [
|
|
"file_path": prop("string", "Absolute path to the local file to upload"),
|
|
"title": prop("string", "Optional title for the document"),
|
|
"tag_ids": prop("string", "Optional comma-separated tag IDs to assign (e.g. '1,3,7')")
|
|
],
|
|
required: ["file_path"]
|
|
)
|
|
]
|
|
}
|
|
|
|
// MARK: - Tool Execution
|
|
|
|
func executeTool(name: String, arguments: String) async -> [String: Any] {
|
|
log.info("Executing Paperless tool: \(name)")
|
|
|
|
guard let argData = arguments.data(using: .utf8),
|
|
let args = try? JSONSerialization.jsonObject(with: argData) as? [String: Any] else {
|
|
return ["error": "Invalid arguments JSON"]
|
|
}
|
|
|
|
do {
|
|
switch name {
|
|
case "paperless_search":
|
|
guard let query = args["query"] as? String else {
|
|
return ["error": "Missing required parameter: query"]
|
|
}
|
|
let page: Int
|
|
if let p = args["page"] as? Int { page = p }
|
|
else if let p = args["page"] as? Double { page = Int(p) }
|
|
else { page = 1 }
|
|
return try await searchDocuments(query: query, page: page)
|
|
|
|
case "paperless_get_document":
|
|
let docId: Int
|
|
if let id = args["document_id"] as? Int { docId = id }
|
|
else if let id = args["document_id"] as? Double { docId = Int(id) }
|
|
else { return ["error": "Missing or invalid parameter: document_id (expected integer)"] }
|
|
return try await getDocument(id: docId)
|
|
|
|
case "paperless_list_tags":
|
|
return try await listTags()
|
|
|
|
case "paperless_list_correspondents":
|
|
return try await listCorrespondents()
|
|
|
|
case "paperless_list_document_types":
|
|
return try await listDocumentTypes()
|
|
|
|
case "paperless_upload_document":
|
|
guard let filePath = args["file_path"] as? String else {
|
|
return ["error": "Missing required parameter: file_path"]
|
|
}
|
|
let title = args["title"] as? String
|
|
let tagIds = args["tag_ids"] as? String
|
|
return try await uploadDocument(filePath: filePath, title: title, tagIds: tagIds)
|
|
|
|
default:
|
|
return ["error": "Unknown Paperless tool: \(name)"]
|
|
}
|
|
} catch PaperlessError.notConfigured {
|
|
return ["error": "Paperless-NGX is not configured. Set your URL and API token in Settings > Paperless."]
|
|
} catch PaperlessError.unauthorized {
|
|
return ["error": "Invalid API token. Check your Paperless-NGX token in Settings > Paperless."]
|
|
} catch PaperlessError.httpError(let code, let msg) {
|
|
return ["error": "Paperless-NGX API error \(code): \(msg)"]
|
|
} catch {
|
|
return ["error": "Paperless error: \(error.localizedDescription)"]
|
|
}
|
|
}
|
|
|
|
// MARK: - API Operations
|
|
|
|
private func searchDocuments(query: String, page: Int) async throws -> [String: Any] {
|
|
await prefetchCaches()
|
|
|
|
let result = try await request(endpoint: "/api/documents/", queryParams: [
|
|
"query": query,
|
|
"page": String(page)
|
|
])
|
|
|
|
let total = result["count"] as? Int ?? 0
|
|
guard let rawResults = result["results"] as? [[String: Any]] else {
|
|
return ["total": total, "page": page, "results": []]
|
|
}
|
|
|
|
let formatted = rawResults.map { doc -> [String: Any] in
|
|
var item: [String: Any] = [:]
|
|
item["id"] = doc["id"] ?? 0
|
|
item["title"] = doc["title"] ?? "Untitled"
|
|
item["created"] = (doc["created"] as? String).map { String($0.prefix(10)) } ?? ""
|
|
|
|
if let corrId = doc["correspondent"] as? Int {
|
|
item["correspondent"] = correspondentCache[corrId] ?? "ID:\(corrId)"
|
|
}
|
|
|
|
if let dtId = doc["document_type"] as? Int {
|
|
item["document_type"] = documentTypeCache[dtId] ?? "ID:\(dtId)"
|
|
}
|
|
|
|
if let tagIds = doc["tags"] as? [Int] {
|
|
item["tags"] = tagIds.map { tagCache[$0] ?? "ID:\($0)" }
|
|
}
|
|
|
|
// Content preview capped at 500 chars
|
|
if let content = doc["content"] as? String, !content.isEmpty {
|
|
let preview = content.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
item["content_preview"] = String(preview.prefix(500))
|
|
}
|
|
|
|
return item
|
|
}
|
|
|
|
return ["total": total, "page": page, "results": formatted]
|
|
}
|
|
|
|
private func getDocument(id: Int) async throws -> [String: Any] {
|
|
await prefetchCaches()
|
|
|
|
let doc = try await request(endpoint: "/api/documents/\(id)/")
|
|
|
|
var result: [String: Any] = [:]
|
|
result["id"] = doc["id"] ?? id
|
|
result["title"] = doc["title"] ?? "Untitled"
|
|
result["created"] = (doc["created"] as? String).map { String($0.prefix(10)) } ?? ""
|
|
result["added"] = (doc["added"] as? String).map { String($0.prefix(10)) } ?? ""
|
|
result["modified"] = (doc["modified"] as? String).map { String($0.prefix(10)) } ?? ""
|
|
|
|
if let corrId = doc["correspondent"] as? Int {
|
|
result["correspondent"] = correspondentCache[corrId] ?? "ID:\(corrId)"
|
|
}
|
|
if let dtId = doc["document_type"] as? Int {
|
|
result["document_type"] = documentTypeCache[dtId] ?? "ID:\(dtId)"
|
|
}
|
|
if let tagIds = doc["tags"] as? [Int] {
|
|
result["tags"] = tagIds.map { tagCache[$0] ?? "ID:\($0)" }
|
|
}
|
|
if let asn = doc["archive_serial_number"] as? String {
|
|
result["archive_serial_number"] = asn
|
|
}
|
|
|
|
// Full OCR content capped at 30,000 chars
|
|
if let content = doc["content"] as? String {
|
|
let trimmed = content.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
result["content"] = String(trimmed.prefix(30_000))
|
|
result["content_length"] = trimmed.count
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
private func listTags() async throws -> [String: Any] {
|
|
let result = try await request(endpoint: "/api/tags/", queryParams: ["page_size": "250"])
|
|
guard let items = result["results"] as? [[String: Any]] else {
|
|
return ["count": 0, "tags": []]
|
|
}
|
|
let formatted = items.map { tag -> [String: Any] in
|
|
["id": tag["id"] ?? 0, "name": tag["name"] ?? "Unknown", "count": tag["document_count"] ?? 0]
|
|
}
|
|
return ["count": formatted.count, "tags": formatted]
|
|
}
|
|
|
|
private func listCorrespondents() async throws -> [String: Any] {
|
|
let result = try await request(endpoint: "/api/correspondents/", queryParams: ["page_size": "250"])
|
|
guard let items = result["results"] as? [[String: Any]] else {
|
|
return ["count": 0, "correspondents": []]
|
|
}
|
|
let formatted = items.map { c -> [String: Any] in
|
|
["id": c["id"] ?? 0, "name": c["name"] ?? "Unknown", "count": c["document_count"] ?? 0]
|
|
}
|
|
return ["count": formatted.count, "correspondents": formatted]
|
|
}
|
|
|
|
private func listDocumentTypes() async throws -> [String: Any] {
|
|
let result = try await request(endpoint: "/api/document_types/", queryParams: ["page_size": "250"])
|
|
guard let items = result["results"] as? [[String: Any]] else {
|
|
return ["count": 0, "document_types": []]
|
|
}
|
|
let formatted = items.map { dt -> [String: Any] in
|
|
["id": dt["id"] ?? 0, "name": dt["name"] ?? "Unknown", "count": dt["document_count"] ?? 0]
|
|
}
|
|
return ["count": formatted.count, "document_types": formatted]
|
|
}
|
|
|
|
private func uploadDocument(filePath: String, title: String?, tagIds: String?) async throws -> [String: Any] {
|
|
let expanded = (filePath as NSString).expandingTildeInPath
|
|
let resolved = (expanded as NSString).standardizingPath
|
|
|
|
guard FileManager.default.fileExists(atPath: resolved) else {
|
|
return ["error": "File not found: \(filePath)"]
|
|
}
|
|
guard let fileData = FileManager.default.contents(atPath: resolved) else {
|
|
return ["error": "Cannot read file: \(filePath)"]
|
|
}
|
|
|
|
let fileName = (resolved as NSString).lastPathComponent
|
|
|
|
guard let token = settings.paperlessAPIToken, !token.isEmpty else {
|
|
throw PaperlessError.notConfigured
|
|
}
|
|
let baseURL = settings.paperlessURL
|
|
guard !baseURL.isEmpty, let url = URL(string: baseURL + "/api/documents/post_document/") else {
|
|
throw PaperlessError.notConfigured
|
|
}
|
|
|
|
let boundary = "PaperlessBoundary\(UUID().uuidString.replacingOccurrences(of: "-", with: ""))"
|
|
var body = Data()
|
|
|
|
func appendField(_ name: String, _ value: String) {
|
|
body.append("--\(boundary)\r\n".data(using: .utf8)!)
|
|
body.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n".data(using: .utf8)!)
|
|
body.append("\(value)\r\n".data(using: .utf8)!)
|
|
}
|
|
|
|
if let title = title, !title.isEmpty {
|
|
appendField("title", title)
|
|
}
|
|
|
|
if let tagIds = tagIds {
|
|
let ids = tagIds.split(separator: ",").compactMap { Int($0.trimmingCharacters(in: .whitespaces)) }
|
|
for id in ids {
|
|
appendField("tags", String(id))
|
|
}
|
|
}
|
|
|
|
let mimeType = mimeTypeFor(fileName: fileName)
|
|
body.append("--\(boundary)\r\n".data(using: .utf8)!)
|
|
body.append("Content-Disposition: form-data; name=\"document\"; filename=\"\(fileName)\"\r\n".data(using: .utf8)!)
|
|
body.append("Content-Type: \(mimeType)\r\n\r\n".data(using: .utf8)!)
|
|
body.append(fileData)
|
|
body.append("\r\n--\(boundary)--\r\n".data(using: .utf8)!)
|
|
|
|
var urlRequest = URLRequest(url: url, timeoutInterval: uploadTimeout)
|
|
urlRequest.httpMethod = "POST"
|
|
urlRequest.setValue("Token \(token)", forHTTPHeaderField: "Authorization")
|
|
urlRequest.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
|
|
urlRequest.httpBody = body
|
|
|
|
let (data, response) = try await URLSession.shared.data(for: urlRequest)
|
|
guard let httpResponse = response as? HTTPURLResponse else {
|
|
throw PaperlessError.httpError(0, "Invalid response")
|
|
}
|
|
|
|
if httpResponse.statusCode == 401 { throw PaperlessError.unauthorized }
|
|
|
|
if (200...299).contains(httpResponse.statusCode) {
|
|
return ["success": true, "message": "Document uploaded successfully. Paperless-NGX will process it shortly."]
|
|
}
|
|
|
|
let msg = String(data: data, encoding: .utf8) ?? "Unknown error"
|
|
throw PaperlessError.httpError(httpResponse.statusCode, msg)
|
|
}
|
|
|
|
// MARK: - Cache Prefetch
|
|
|
|
private func prefetchCaches() async {
|
|
if tagCache.isEmpty {
|
|
if let result = try? await request(endpoint: "/api/tags/", queryParams: ["page_size": "250"]),
|
|
let items = result["results"] as? [[String: Any]] {
|
|
for item in items {
|
|
if let id = item["id"] as? Int, let name = item["name"] as? String {
|
|
tagCache[id] = name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if correspondentCache.isEmpty {
|
|
if let result = try? await request(endpoint: "/api/correspondents/", queryParams: ["page_size": "250"]),
|
|
let items = result["results"] as? [[String: Any]] {
|
|
for item in items {
|
|
if let id = item["id"] as? Int, let name = item["name"] as? String {
|
|
correspondentCache[id] = name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if documentTypeCache.isEmpty {
|
|
if let result = try? await request(endpoint: "/api/document_types/", queryParams: ["page_size": "250"]),
|
|
let items = result["results"] as? [[String: Any]] {
|
|
for item in items {
|
|
if let id = item["id"] as? Int, let name = item["name"] as? String {
|
|
documentTypeCache[id] = name
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - HTTP Client
|
|
|
|
private func request(endpoint: String, queryParams: [String: String] = [:]) async throws -> [String: Any] {
|
|
guard let token = settings.paperlessAPIToken, !token.isEmpty else {
|
|
throw PaperlessError.notConfigured
|
|
}
|
|
let baseURL = settings.paperlessURL
|
|
guard !baseURL.isEmpty else { throw PaperlessError.notConfigured }
|
|
|
|
var urlString = baseURL + endpoint
|
|
if !queryParams.isEmpty {
|
|
var comps = URLComponents(string: urlString) ?? URLComponents()
|
|
comps.queryItems = queryParams.map { URLQueryItem(name: $0.key, value: $0.value) }
|
|
urlString = comps.url?.absoluteString ?? urlString
|
|
}
|
|
|
|
guard let url = URL(string: urlString) else {
|
|
throw PaperlessError.httpError(0, "Invalid URL: \(urlString)")
|
|
}
|
|
|
|
var urlRequest = URLRequest(url: url, timeoutInterval: readTimeout)
|
|
urlRequest.httpMethod = "GET"
|
|
urlRequest.setValue("Token \(token)", forHTTPHeaderField: "Authorization")
|
|
urlRequest.setValue("application/json", forHTTPHeaderField: "Accept")
|
|
|
|
do {
|
|
let (data, response) = try await URLSession.shared.data(for: urlRequest)
|
|
|
|
guard let httpResponse = response as? HTTPURLResponse else {
|
|
throw PaperlessError.httpError(0, "Invalid response")
|
|
}
|
|
|
|
if httpResponse.statusCode == 401 { throw PaperlessError.unauthorized }
|
|
|
|
guard (200...299).contains(httpResponse.statusCode) else {
|
|
let msg = String(data: data, encoding: .utf8) ?? "Unknown error"
|
|
throw PaperlessError.httpError(httpResponse.statusCode, msg)
|
|
}
|
|
|
|
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
|
|
return [:]
|
|
}
|
|
return json
|
|
} catch let error as PaperlessError {
|
|
throw error
|
|
} catch {
|
|
throw PaperlessError.httpError(0, error.localizedDescription)
|
|
}
|
|
}
|
|
|
|
// MARK: - Helpers
|
|
|
|
private func mimeTypeFor(fileName: String) -> String {
|
|
let ext = (fileName as NSString).pathExtension.lowercased()
|
|
switch ext {
|
|
case "pdf": return "application/pdf"
|
|
case "png": return "image/png"
|
|
case "jpg", "jpeg": return "image/jpeg"
|
|
case "tiff", "tif": return "image/tiff"
|
|
case "gif": return "image/gif"
|
|
case "webp": return "image/webp"
|
|
default: return "application/octet-stream"
|
|
}
|
|
}
|
|
|
|
private func makeTool(name: String, description: String, properties: [String: Tool.Function.Parameters.Property], required: [String]) -> Tool {
|
|
Tool(
|
|
type: "function",
|
|
function: Tool.Function(
|
|
name: name,
|
|
description: description,
|
|
parameters: Tool.Function.Parameters(
|
|
type: "object",
|
|
properties: properties,
|
|
required: required
|
|
)
|
|
)
|
|
)
|
|
}
|
|
|
|
private func prop(_ type: String, _ description: String) -> Tool.Function.Parameters.Property {
|
|
Tool.Function.Parameters.Property(type: type, description: description, enum: nil)
|
|
}
|
|
}
|
|
|
|
// MARK: - Error Types
|
|
|
|
enum PaperlessError: LocalizedError {
|
|
case notConfigured
|
|
case unauthorized
|
|
case httpError(Int, String)
|
|
|
|
var errorDescription: String? {
|
|
switch self {
|
|
case .notConfigured:
|
|
return "Paperless-NGX is not configured. Set your URL and API token in Settings > Paperless."
|
|
case .unauthorized:
|
|
return "Invalid API token. Check your Paperless-NGX token in Settings > Paperless."
|
|
case .httpError(let code, let msg):
|
|
return "Paperless-NGX API error \(code): \(msg)"
|
|
}
|
|
}
|
|
}
|