Initial commit
This commit is contained in:
143
oAI/Services/WebSearchService.swift
Normal file
143
oAI/Services/WebSearchService.swift
Normal file
@@ -0,0 +1,143 @@
|
||||
//
|
||||
// WebSearchService.swift
|
||||
// oAI
|
||||
//
|
||||
// DuckDuckGo web search for non-OpenRouter providers
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
struct SearchResult: Sendable {
|
||||
let title: String
|
||||
let url: String
|
||||
let snippet: String
|
||||
}
|
||||
|
||||
final class WebSearchService: Sendable {
|
||||
nonisolated static let shared = WebSearchService()
|
||||
|
||||
private let session: URLSession
|
||||
|
||||
nonisolated private init() {
|
||||
let config = URLSessionConfiguration.default
|
||||
config.timeoutIntervalForRequest = 10
|
||||
session = URLSession(configuration: config)
|
||||
}
|
||||
|
||||
/// Search DuckDuckGo HTML interface (no API key needed)
|
||||
nonisolated func search(query: String, maxResults: Int = 5) async -> [SearchResult] {
|
||||
Log.search.info("Web search: \(query)")
|
||||
guard let encoded = query.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed),
|
||||
let url = URL(string: "https://html.duckduckgo.com/html/?q=\(encoded)")
|
||||
else { return [] }
|
||||
|
||||
var request = URLRequest(url: url)
|
||||
request.httpMethod = "GET"
|
||||
request.setValue(
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||
forHTTPHeaderField: "User-Agent"
|
||||
)
|
||||
|
||||
do {
|
||||
let (data, _) = try await session.data(for: request)
|
||||
guard let html = String(data: data, encoding: .utf8) else { return [] }
|
||||
return parseResults(from: html, maxResults: maxResults)
|
||||
} catch {
|
||||
Log.search.error("Web search failed: \(error.localizedDescription)")
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/// Format search results as markdown for prompt injection
|
||||
nonisolated func formatResults(_ results: [SearchResult], maxLength: Int = 2000) -> String {
|
||||
if results.isEmpty { return "No search results found." }
|
||||
|
||||
var formatted = "**Web Search Results:**\n\n"
|
||||
|
||||
for (i, result) in results.enumerated() {
|
||||
var entry = "\(i + 1). **\(result.title)**\n"
|
||||
entry += " URL: \(result.url)\n"
|
||||
if !result.snippet.isEmpty {
|
||||
entry += " \(result.snippet)\n"
|
||||
}
|
||||
entry += "\n"
|
||||
|
||||
if formatted.count + entry.count > maxLength {
|
||||
formatted += "... (\(results.count - i) more results truncated)\n"
|
||||
break
|
||||
}
|
||||
|
||||
formatted += entry
|
||||
}
|
||||
|
||||
return formatted.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
// MARK: - HTML Parsing
|
||||
|
||||
private nonisolated func parseResults(from html: String, maxResults: Int) -> [SearchResult] {
|
||||
var results: [SearchResult] = []
|
||||
|
||||
// Match result blocks: <div class="result results_links ...">
|
||||
let blockPattern = #"<div class="result results_links.*?(?=<div class="result results_links|<div id="links")"#
|
||||
guard let blockRegex = try? NSRegularExpression(pattern: blockPattern, options: .dotMatchesLineSeparators) else {
|
||||
return []
|
||||
}
|
||||
|
||||
let range = NSRange(html.startIndex..., in: html)
|
||||
let blocks = blockRegex.matches(in: html, range: range)
|
||||
|
||||
for match in blocks.prefix(maxResults) {
|
||||
guard let blockRange = Range(match.range, in: html) else { continue }
|
||||
let block = String(html[blockRange])
|
||||
|
||||
// Extract title and URL from <a class="result__a" href="...">Title</a>
|
||||
let titlePattern = #"<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([^<]+)</a>"#
|
||||
guard let titleRegex = try? NSRegularExpression(pattern: titlePattern),
|
||||
let titleMatch = titleRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
|
||||
let urlRange = Range(titleMatch.range(at: 1), in: block),
|
||||
let titleRange = Range(titleMatch.range(at: 2), in: block)
|
||||
else { continue }
|
||||
|
||||
var resultURL = String(block[urlRange])
|
||||
let title = decodeHTMLEntities(String(block[titleRange]).trimmingCharacters(in: .whitespaces))
|
||||
|
||||
// Extract snippet from <a class="result__snippet" ...>text</a>
|
||||
let snippetPattern = #"<a[^>]*class="result__snippet"[^>]*>([^<]+)</a>"#
|
||||
var snippet = ""
|
||||
if let snippetRegex = try? NSRegularExpression(pattern: snippetPattern),
|
||||
let snippetMatch = snippetRegex.firstMatch(in: block, range: NSRange(block.startIndex..., in: block)),
|
||||
let snippetRange = Range(snippetMatch.range(at: 1), in: block) {
|
||||
snippet = decodeHTMLEntities(String(block[snippetRange]).trimmingCharacters(in: .whitespaces))
|
||||
}
|
||||
|
||||
// Decode DDG redirect URL
|
||||
if resultURL.contains("uddg=") {
|
||||
let uddgPattern = #"uddg=([^&]+)"#
|
||||
if let uddgRegex = try? NSRegularExpression(pattern: uddgPattern),
|
||||
let uddgMatch = uddgRegex.firstMatch(in: resultURL, range: NSRange(resultURL.startIndex..., in: resultURL)),
|
||||
let uddgRange = Range(uddgMatch.range(at: 1), in: resultURL) {
|
||||
resultURL = String(resultURL[uddgRange]).removingPercentEncoding ?? resultURL
|
||||
}
|
||||
}
|
||||
|
||||
results.append(SearchResult(title: title, url: resultURL, snippet: snippet))
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
private nonisolated func decodeHTMLEntities(_ string: String) -> String {
|
||||
var result = string
|
||||
let entities: [(String, String)] = [
|
||||
("&", "&"), ("<", "<"), (">", ">"),
|
||||
(""", "\""), ("'", "'"), ("'", "'"),
|
||||
("'", "'"), ("/", "/"), (" ", " "),
|
||||
]
|
||||
for (entity, char) in entities {
|
||||
result = result.replacingOccurrences(of: entity, with: char)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user