162 lines
6.4 KiB
Swift
162 lines
6.4 KiB
Swift
//
|
|
// WebSearchService.swift
|
|
// oAI
|
|
//
|
|
// DuckDuckGo web search for non-OpenRouter providers
|
|
//
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
// Copyright (C) 2026 Rune Olsen
|
|
//
|
|
// This file is part of oAI.
|
|
//
|
|
// oAI is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as
|
|
// published by the Free Software Foundation, either version 3 of the
|
|
// License, or (at your option) any later version.
|
|
//
|
|
// oAI is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
|
|
// Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public
|
|
// License along with oAI. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
import Foundation
|
|
import os
|
|
|
|
struct SearchResult: Sendable {
|
|
let title: String
|
|
let url: String
|
|
let snippet: String
|
|
}
|
|
|
|
final class WebSearchService: Sendable {
|
|
nonisolated static let shared = WebSearchService()
|
|
|
|
private let session: URLSession
|
|
|
|
nonisolated private init() {
|
|
let config = URLSessionConfiguration.default
|
|
config.timeoutIntervalForRequest = 10
|
|
session = URLSession(configuration: config)
|
|
}
|
|
|
|
/// Search DuckDuckGo HTML interface (no API key needed)
|
|
nonisolated func search(query: String, maxResults: Int = 5) async -> [SearchResult] {
|
|
Log.search.info("Web search: \(query)")
|
|
guard let encoded = query.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed),
|
|
let url = URL(string: "https://html.duckduckgo.com/html/?q=\(encoded)")
|
|
else { return [] }
|
|
|
|
var request = URLRequest(url: url)
|
|
request.httpMethod = "GET"
|
|
request.setValue(
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
forHTTPHeaderField: "User-Agent"
|
|
)
|
|
|
|
do {
|
|
let (data, _) = try await session.data(for: request)
|
|
guard let html = String(data: data, encoding: .utf8) else { return [] }
|
|
return parseResults(from: html, maxResults: maxResults)
|
|
} catch {
|
|
Log.search.error("Web search failed: \(error.localizedDescription)")
|
|
return []
|
|
}
|
|
}
|
|
|
|
/// Format search results as markdown for prompt injection
|
|
nonisolated func formatResults(_ results: [SearchResult], maxLength: Int = 2000) -> String {
|
|
if results.isEmpty { return "No search results found." }
|
|
|
|
var formatted = "**Web Search Results:**\n\n"
|
|
|
|
for (i, result) in results.enumerated() {
|
|
var entry = "\(i + 1). **\(result.title)**\n"
|
|
entry += " URL: \(result.url)\n"
|
|
if !result.snippet.isEmpty {
|
|
entry += " \(result.snippet)\n"
|
|
}
|
|
entry += "\n"
|
|
|
|
if formatted.count + entry.count > maxLength {
|
|
formatted += "... (\(results.count - i) more results truncated)\n"
|
|
break
|
|
}
|
|
|
|
formatted += entry
|
|
}
|
|
|
|
return formatted.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
}
|
|
|
|
// MARK: - HTML Parsing
|
|
|
|
private nonisolated func parseResults(from html: String, maxResults: Int) -> [SearchResult] {
|
|
var results: [SearchResult] = []
|
|
|
|
// Match result blocks: <div class="result results_links ...">
|
|
let blockPattern = #"<div class="result results_links.*?(?=<div class="result results_links|<div id="links")"#
|
|
guard let blockRegex = try? NSRegularExpression(pattern: blockPattern, options: .dotMatchesLineSeparators) else {
|
|
return []
|
|
}
|
|
|
|
let range = NSRange(html.startIndex..., in: html)
|
|
let blocks = blockRegex.matches(in: html, range: range)
|
|
|
|
for match in blocks.prefix(maxResults) {
|
|
guard let blockRange = Range(match.range, in: html) else { continue }
|
|
let block = String(html[blockRange])
|
|
|
|
// Extract title and URL from <a class="result__a" href="...">Title</a>
|
|
let titlePattern = #"<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([^<]+)</a>"#
|
|
guard let titleRegex = try? NSRegularExpression(pattern: titlePattern),
|
|
let titleMatch = titleRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
|
|
let urlRange = Range(titleMatch.range(at: 1), in: block),
|
|
let titleRange = Range(titleMatch.range(at: 2), in: block)
|
|
else { continue }
|
|
|
|
var resultURL = String(block[urlRange])
|
|
let title = decodeHTMLEntities(String(block[titleRange]).trimmingCharacters(in: .whitespaces))
|
|
|
|
// Extract snippet from <a class="result__snippet" ...>text</a>
|
|
let snippetPattern = #"<a[^>]*class="result__snippet"[^>]*>([^<]+)</a>"#
|
|
var snippet = ""
|
|
if let snippetRegex = try? NSRegularExpression(pattern: snippetPattern),
|
|
let snippetMatch = snippetRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
|
|
let snippetRange = Range(snippetMatch.range(at: 1), in: block) {
|
|
snippet = decodeHTMLEntities(String(block[snippetRange]).trimmingCharacters(in: .whitespaces))
|
|
}
|
|
|
|
// Decode DDG redirect URL
|
|
if resultURL.contains("uddg=") {
|
|
let uddgPattern = #"uddg=([^&]+)"#
|
|
if let uddgRegex = try? NSRegularExpression(pattern: uddgPattern),
|
|
let uddgMatch = uddgRegex.firstMatch(in: resultURL, range: NSRange(resultURL.startIndex..., in: resultURL)),
|
|
let uddgRange = Range(uddgMatch.range(at: 1), in: resultURL) {
|
|
resultURL = String(resultURL[uddgRange]).removingPercentEncoding ?? resultURL
|
|
}
|
|
}
|
|
|
|
results.append(SearchResult(title: title, url: resultURL, snippet: snippet))
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
private nonisolated func decodeHTMLEntities(_ string: String) -> String {
|
|
var result = string
|
|
let entities: [(String, String)] = [
|
|
("&", "&"), ("<", "<"), (">", ">"),
|
|
(""", "\""), ("'", "'"), ("'", "'"),
|
|
("'", "'"), ("/", "/"), (" ", " "),
|
|
]
|
|
for (entity, char) in entities {
|
|
result = result.replacingOccurrences(of: entity, with: char)
|
|
}
|
|
return result
|
|
}
|
|
}
|