Files
oai-swift/oAI/Services/WebSearchService.swift

162 lines
6.4 KiB
Swift

//
// WebSearchService.swift
// oAI
//
// DuckDuckGo web search for non-OpenRouter providers
//
// SPDX-License-Identifier: AGPL-3.0-or-later
// Copyright (C) 2026 Rune Olsen
//
// This file is part of oAI.
//
// oAI is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// oAI is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
// Public License for more details.
//
// You should have received a copy of the GNU Affero General Public
// License along with oAI. If not, see <https://www.gnu.org/licenses/>.
import Foundation
import os
struct SearchResult: Sendable {
let title: String
let url: String
let snippet: String
}
final class WebSearchService: Sendable {
nonisolated static let shared = WebSearchService()
private let session: URLSession
nonisolated private init() {
let config = URLSessionConfiguration.default
config.timeoutIntervalForRequest = 10
session = URLSession(configuration: config)
}
/// Search DuckDuckGo HTML interface (no API key needed)
nonisolated func search(query: String, maxResults: Int = 5) async -> [SearchResult] {
Log.search.info("Web search: \(query)")
guard let encoded = query.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed),
let url = URL(string: "https://html.duckduckgo.com/html/?q=\(encoded)")
else { return [] }
var request = URLRequest(url: url)
request.httpMethod = "GET"
request.setValue(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
forHTTPHeaderField: "User-Agent"
)
do {
let (data, _) = try await session.data(for: request)
guard let html = String(data: data, encoding: .utf8) else { return [] }
return parseResults(from: html, maxResults: maxResults)
} catch {
Log.search.error("Web search failed: \(error.localizedDescription)")
return []
}
}
/// Format search results as markdown for prompt injection
nonisolated func formatResults(_ results: [SearchResult], maxLength: Int = 2000) -> String {
if results.isEmpty { return "No search results found." }
var formatted = "**Web Search Results:**\n\n"
for (i, result) in results.enumerated() {
var entry = "\(i + 1). **\(result.title)**\n"
entry += " URL: \(result.url)\n"
if !result.snippet.isEmpty {
entry += " \(result.snippet)\n"
}
entry += "\n"
if formatted.count + entry.count > maxLength {
formatted += "... (\(results.count - i) more results truncated)\n"
break
}
formatted += entry
}
return formatted.trimmingCharacters(in: .whitespacesAndNewlines)
}
// MARK: - HTML Parsing
private nonisolated func parseResults(from html: String, maxResults: Int) -> [SearchResult] {
var results: [SearchResult] = []
// Match result blocks: <div class="result results_links ...">
let blockPattern = #"<div class="result results_links.*?(?=<div class="result results_links|<div id="links")"#
guard let blockRegex = try? NSRegularExpression(pattern: blockPattern, options: .dotMatchesLineSeparators) else {
return []
}
let range = NSRange(html.startIndex..., in: html)
let blocks = blockRegex.matches(in: html, range: range)
for match in blocks.prefix(maxResults) {
guard let blockRange = Range(match.range, in: html) else { continue }
let block = String(html[blockRange])
// Extract title and URL from <a class="result__a" href="...">Title</a>
let titlePattern = #"<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([^<]+)</a>"#
guard let titleRegex = try? NSRegularExpression(pattern: titlePattern),
let titleMatch = titleRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
let urlRange = Range(titleMatch.range(at: 1), in: block),
let titleRange = Range(titleMatch.range(at: 2), in: block)
else { continue }
var resultURL = String(block[urlRange])
let title = decodeHTMLEntities(String(block[titleRange]).trimmingCharacters(in: .whitespaces))
// Extract snippet from <a class="result__snippet" ...>text</a>
let snippetPattern = #"<a[^>]*class="result__snippet"[^>]*>([^<]+)</a>"#
var snippet = ""
if let snippetRegex = try? NSRegularExpression(pattern: snippetPattern),
let snippetMatch = snippetRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
let snippetRange = Range(snippetMatch.range(at: 1), in: block) {
snippet = decodeHTMLEntities(String(block[snippetRange]).trimmingCharacters(in: .whitespaces))
}
// Decode DDG redirect URL
if resultURL.contains("uddg=") {
let uddgPattern = #"uddg=([^&]+)"#
if let uddgRegex = try? NSRegularExpression(pattern: uddgPattern),
let uddgMatch = uddgRegex.firstMatch(in: resultURL, range: NSRange(resultURL.startIndex..., in: resultURL)),
let uddgRange = Range(uddgMatch.range(at: 1), in: resultURL) {
resultURL = String(resultURL[uddgRange]).removingPercentEncoding ?? resultURL
}
}
results.append(SearchResult(title: title, url: resultURL, snippet: snippet))
}
return results
}
private nonisolated func decodeHTMLEntities(_ string: String) -> String {
var result = string
let entities: [(String, String)] = [
("&amp;", "&"), ("&lt;", "<"), ("&gt;", ">"),
("&quot;", "\""), ("&#39;", "'"), ("&apos;", "'"),
("&#x27;", "'"), ("&#x2F;", "/"), ("&nbsp;", " "),
]
for (entity, char) in entities {
result = result.replacingOccurrences(of: entity, with: char)
}
return result
}
}