Bugfix: Lingering error in image generation from image gen. models

This commit is contained in:
2026-03-04 11:52:18 +01:00
parent 49f842f119
commit 305abfa85d
6 changed files with 137 additions and 43 deletions

View File

@@ -283,7 +283,7 @@
LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 26.2;
MARKETING_VERSION = 2.3.6;
MARKETING_VERSION = 2.3.7;
PRODUCT_BUNDLE_IDENTIFIER = com.oai.oAI;
PRODUCT_NAME = "$(TARGET_NAME)";
REGISTER_APP_GROUPS = YES;
@@ -327,7 +327,7 @@
LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks";
"LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks";
MACOSX_DEPLOYMENT_TARGET = 26.2;
MARKETING_VERSION = 2.3.6;
MARKETING_VERSION = 2.3.7;
PRODUCT_BUNDLE_IDENTIFIER = com.oai.oAI;
PRODUCT_NAME = "$(TARGET_NAME)";
REGISTER_APP_GROUPS = YES;

View File

@@ -160,6 +160,18 @@ struct OpenRouterChatResponse: Codable {
let content: String?
let toolCalls: [APIToolCall]?
let images: [ImageOutput]?
// Images extracted from content[] blocks (e.g. GPT-5 Image response format)
let contentBlockImages: [ImageOutput]
private struct ContentBlock: Codable {
let type: String
let text: String?
let imageUrl: ImageOutput.ImageURL?
enum CodingKeys: String, CodingKey {
case type, text
case imageUrl = "image_url"
}
}
enum CodingKeys: String, CodingKey {
case role
@@ -167,6 +179,27 @@ struct OpenRouterChatResponse: Codable {
case toolCalls = "tool_calls"
case images
}
init(from decoder: Decoder) throws {
let c = try decoder.container(keyedBy: CodingKeys.self)
role = try c.decode(String.self, forKey: .role)
toolCalls = try c.decodeIfPresent([APIToolCall].self, forKey: .toolCalls)
images = try c.decodeIfPresent([ImageOutput].self, forKey: .images)
// content can be a plain String OR an array of content blocks
if let text = try? c.decodeIfPresent(String.self, forKey: .content) {
content = text
contentBlockImages = []
} else if let blocks = try? c.decodeIfPresent([ContentBlock].self, forKey: .content) {
content = blocks.compactMap { $0.text }.joined().nonEmptyOrNil
contentBlockImages = blocks.compactMap { block in
guard block.type == "image_url", let url = block.imageUrl else { return nil }
return ImageOutput(imageUrl: url)
}
} else {
content = nil
contentBlockImages = []
}
}
}
enum CodingKeys: String, CodingKey {

View File

@@ -160,8 +160,17 @@ class OpenRouterProvider: AIProvider {
throw ProviderError.unknown("HTTP \(httpResponse.statusCode)")
}
// Debug: log raw response for image gen models
if request.imageGeneration, let rawStr = String(data: data, encoding: .utf8) {
Log.api.debug("Image gen raw response (first 3000 chars): \(rawStr.prefix(3000))")
}
let apiResponse = try JSONDecoder().decode(OpenRouterChatResponse.self, from: data)
return try convertToChatResponse(apiResponse)
let chatResponse = try convertToChatResponse(apiResponse)
if request.imageGeneration {
Log.api.debug("Image gen decoded: content='\(chatResponse.content)', generatedImages=\(chatResponse.generatedImages?.count ?? 0)")
}
return chatResponse
}
// MARK: - Chat with raw tool messages
@@ -396,7 +405,10 @@ class OpenRouterProvider: AIProvider {
ToolCallInfo(id: tc.id, type: tc.type, functionName: tc.function.name, arguments: tc.function.arguments)
}
let images = choice.message.images.flatMap { decodeImageOutputs($0) }
let topLevelImages = choice.message.images.flatMap { decodeImageOutputs($0) } ?? []
let blockImages = decodeImageOutputs(choice.message.contentBlockImages) ?? []
let allImages = topLevelImages + blockImages
let images: [Data]? = allImages.isEmpty ? nil : allImages
return ChatResponse(
id: apiResponse.id,

View File

@@ -1265,6 +1265,7 @@ Don't narrate future actions ("Let me...") - just use the tools.
private func generateAIResponseWithTools(provider: AIProvider, modelId: String) {
let mcp = MCPService.shared
Log.ui.info("generateAIResponseWithTools: model=\(modelId)")
isGenerating = true
streamingTask?.cancel()
@@ -1351,6 +1352,8 @@ Don't narrate future actions ("Let me...") - just use the tools.
let maxIterations = 10 // Increased from 5 to reduce hitting client-side limit
var finalContent = ""
var finalImages: [Data] = []
var didContinueAfterImages = false // Only inject temp-file continuation once
var totalUsage: ChatResponse.Usage?
var hitIterationLimit = false // Track if we exited due to hitting the limit
@@ -1379,9 +1382,32 @@ Don't narrate future actions ("Let me...") - just use the tools.
let toolCalls = structuredCalls.isEmpty ? textCalls : structuredCalls
guard !toolCalls.isEmpty else {
// No tool calls this is the final text response
// Strip any unparseable tool call text from display
// No tool calls this is the final response
finalContent = response.content
if let images = response.generatedImages { finalImages = images }
Log.ui.debug("Tools final response: content='\(response.content.prefix(80))', images=\(response.generatedImages?.count ?? 0)")
// If images were generated and tools are available, save to temp files
// and continue the loop so the model can save them to the requested path.
if !finalImages.isEmpty && !didContinueAfterImages && iteration < maxIterations - 1 {
didContinueAfterImages = true
let timestamp = Int(Date().timeIntervalSince1970)
let tempPaths: [String] = finalImages.enumerated().compactMap { i, imgData in
let path = "/tmp/oai_generated_\(timestamp)_\(i).png"
let ok = FileManager.default.createFile(atPath: path, contents: imgData)
Log.ui.debug("Saved generated image to temp: \(path) ok=\(ok)")
return ok ? path : nil
}
if !tempPaths.isEmpty {
let pathList = tempPaths.joined(separator: ", ")
let assistantContent = response.content.isEmpty ? "[Image generated]" : response.content
apiMessages.append(["role": "assistant", "content": assistantContent])
apiMessages.append(["role": "user", "content": "The image(s) have been generated and temporarily saved to: \(pathList). Please save them to the requested destination(s) using the available tools (bash or MCP write)."])
finalImages = []
finalContent = ""
continue
}
}
break
}
@@ -1491,7 +1517,8 @@ Don't narrate future actions ("Let me...") - just use the tools.
attachments: nil,
responseTime: responseTime,
wasInterrupted: wasCancelled,
modelId: modelId
modelId: modelId,
generatedImages: finalImages.isEmpty ? nil : finalImages
)
messages.append(assistantMessage)
@@ -1935,12 +1962,11 @@ Don't narrate future actions ("Let me...") - just use the tools.
func detectGoodbyePhrase(in text: String) -> Bool {
let lowercased = text.lowercased()
let goodbyePhrases = [
"bye", "goodbye", "bye bye",
"thanks", "thank you", "thx", "ty",
"bye", "goodbye", "bye bye", "good bye",
"that's all", "thats all", "that'll be all",
"done", "i'm done", "we're done",
"i'm done", "we're done",
"see you", "see ya", "catch you later",
"have a good", "have a nice"
"have a good day", "have a nice day"
]
return goodbyePhrases.contains { phrase in

View File

@@ -44,7 +44,7 @@ struct ContentView: View {
#endif
}
}
.frame(minWidth: 640, minHeight: 400)
.frame(minWidth: 860, minHeight: 560)
#if os(macOS)
.onAppear {
NSApplication.shared.windows.forEach { $0.tabbingMode = .disallowed }
@@ -120,24 +120,24 @@ struct ContentView: View {
private var macOSToolbar: some ToolbarContent {
let settings = SettingsService.shared
let showLabels = settings.showToolbarLabels
let scale = iconScale(for: settings.toolbarIconSize)
let iconSize = settings.toolbarIconSize
ToolbarItemGroup(placement: .automatic) {
// New conversation
Button(action: { chatViewModel.newConversation() }) {
ToolbarLabel(title: "New Chat", systemImage: "square.and.pencil", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "New Chat", systemImage: "square.and.pencil", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("n", modifiers: .command)
.help("New conversation")
Button(action: { chatViewModel.showConversations = true }) {
ToolbarLabel(title: "Conversations", systemImage: "clock.arrow.circlepath", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Conversations", systemImage: "clock.arrow.circlepath", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("l", modifiers: .command)
.help("Saved conversations (Cmd+L)")
Button(action: { chatViewModel.showHistory = true }) {
ToolbarLabel(title: "History", systemImage: "list.bullet", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "History", systemImage: "list.bullet", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("h", modifiers: .command)
.help("Command history (Cmd+H)")
@@ -145,7 +145,7 @@ struct ContentView: View {
Spacer()
Button(action: { chatViewModel.showModelSelector = true }) {
ToolbarLabel(title: "Model", systemImage: "cpu", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Model", systemImage: "cpu", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("m", modifiers: .command)
.help("Select AI model (Cmd+M)")
@@ -155,32 +155,32 @@ struct ContentView: View {
chatViewModel.modelInfoTarget = model
}
}) {
ToolbarLabel(title: "Info", systemImage: "info.circle", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Info", systemImage: "info.circle", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("i", modifiers: .command)
.help("Model info (Cmd+I)")
.disabled(chatViewModel.selectedModel == nil)
Button(action: { chatViewModel.showStats = true }) {
ToolbarLabel(title: "Stats", systemImage: "chart.bar", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Stats", systemImage: "chart.bar", showLabels: showLabels, iconSize: iconSize)
}
.help("Session statistics")
Button(action: { chatViewModel.showCredits = true }) {
ToolbarLabel(title: "Credits", systemImage: "creditcard", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Credits", systemImage: "creditcard", showLabels: showLabels, iconSize: iconSize)
}
.help("Check API credits")
Spacer()
Button(action: { chatViewModel.showSettings = true }) {
ToolbarLabel(title: "Settings", systemImage: "gearshape", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Settings", systemImage: "gearshape", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut(",", modifiers: .command)
.help("Settings (Cmd+,)")
Button(action: { chatViewModel.showHelp = true }) {
ToolbarLabel(title: "Help", systemImage: "questionmark.circle", showLabels: showLabels, scale: scale)
ToolbarLabel(title: "Help", systemImage: "questionmark.circle", showLabels: showLabels, iconSize: iconSize)
}
.keyboardShortcut("/", modifiers: .command)
.help("Help & commands (Cmd+/)")
@@ -188,14 +188,6 @@ struct ContentView: View {
}
#endif
// Helper function to convert icon size to imageScale
private func iconScale(for size: Double) -> Image.Scale {
switch size {
case ...18: return .small
case 19...24: return .medium
default: return .large
}
}
}
// Helper view for toolbar labels
@@ -203,9 +195,32 @@ struct ToolbarLabel: View {
let title: LocalizedStringKey
let systemImage: String
let showLabels: Bool
let scale: Image.Scale
let iconSize: Double
// imageScale for the original range (32); explicit font size for the new extra-large range (>32)
private var scale: Image.Scale {
switch iconSize {
case ...18: return .small
case 19...24: return .medium
default: return .large
}
}
var body: some View {
if iconSize > 32 {
// Extra-large: explicit font size above the system .large ceiling
// Offset by 16 so slider 3418pt, 3620pt, 3822pt, 4024pt
if showLabels {
Label(title, systemImage: systemImage)
.labelStyle(.titleAndIcon)
.font(.system(size: iconSize - 16))
} else {
Label(title, systemImage: systemImage)
.labelStyle(.iconOnly)
.font(.system(size: iconSize - 16))
}
} else {
// Original behaviour imageScale keeps existing look intact
if showLabels {
Label(title, systemImage: systemImage)
.labelStyle(.titleAndIcon)
@@ -216,6 +231,7 @@ struct ToolbarLabel: View {
.imageScale(scale)
}
}
}
}
#Preview {

View File

@@ -749,7 +749,7 @@ It's better to admit "I need more information" or "I cannot do that" than to fak
formSection {
row("Icon Size") {
HStack(spacing: 8) {
Slider(value: $settingsService.toolbarIconSize, in: 16...32, step: 2)
Slider(value: $settingsService.toolbarIconSize, in: 16...40, step: 2)
.frame(maxWidth: 200)
Text("\(Int(settingsService.toolbarIconSize)) pt")
.font(.system(size: 13))
@@ -1803,6 +1803,13 @@ It's better to admit "I need more information" or "I cannot do that" than to fak
Toggle("", isOn: $settingsService.paperlessEnabled)
.toggleStyle(.switch)
}
VStack(alignment: .leading, spacing: 2) {
Text("⚠️ Beta — Paperless integration is under active development. Some features may be incomplete or behave unexpectedly.")
.font(.caption)
.foregroundStyle(.secondary)
}
.padding(.horizontal, 12)
.padding(.bottom, 8)
}
}
@@ -2092,13 +2099,13 @@ It's better to admit "I need more information" or "I cannot do that" than to fak
.foregroundStyle(selectedTab == tag ? .blue : .secondary)
if beta {
Text("β")
.font(.system(size: 8, weight: .bold))
.font(.system(size: 9, weight: .heavy))
.foregroundStyle(.white)
.padding(.horizontal, 3)
.padding(.vertical, 1)
.padding(.horizontal, 4)
.padding(.vertical, 2)
.background(Color.orange)
.clipShape(Capsule())
.offset(x: 6, y: -2)
.offset(x: 8, y: -3)
}
}
Text(label)