diff --git a/oAI.xcodeproj/project.pbxproj b/oAI.xcodeproj/project.pbxproj index bcfd403..127be75 100644 --- a/oAI.xcodeproj/project.pbxproj +++ b/oAI.xcodeproj/project.pbxproj @@ -283,7 +283,7 @@ LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 26.2; - MARKETING_VERSION = 2.3.6; + MARKETING_VERSION = 2.3.7; PRODUCT_BUNDLE_IDENTIFIER = com.oai.oAI; PRODUCT_NAME = "$(TARGET_NAME)"; REGISTER_APP_GROUPS = YES; @@ -327,7 +327,7 @@ LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 26.2; - MARKETING_VERSION = 2.3.6; + MARKETING_VERSION = 2.3.7; PRODUCT_BUNDLE_IDENTIFIER = com.oai.oAI; PRODUCT_NAME = "$(TARGET_NAME)"; REGISTER_APP_GROUPS = YES; diff --git a/oAI/Providers/OpenRouterModels.swift b/oAI/Providers/OpenRouterModels.swift index 7ad8504..7701a60 100644 --- a/oAI/Providers/OpenRouterModels.swift +++ b/oAI/Providers/OpenRouterModels.swift @@ -160,6 +160,18 @@ struct OpenRouterChatResponse: Codable { let content: String? let toolCalls: [APIToolCall]? let images: [ImageOutput]? + // Images extracted from content[] blocks (e.g. GPT-5 Image response format) + let contentBlockImages: [ImageOutput] + + private struct ContentBlock: Codable { + let type: String + let text: String? + let imageUrl: ImageOutput.ImageURL? + enum CodingKeys: String, CodingKey { + case type, text + case imageUrl = "image_url" + } + } enum CodingKeys: String, CodingKey { case role @@ -167,6 +179,27 @@ struct OpenRouterChatResponse: Codable { case toolCalls = "tool_calls" case images } + + init(from decoder: Decoder) throws { + let c = try decoder.container(keyedBy: CodingKeys.self) + role = try c.decode(String.self, forKey: .role) + toolCalls = try c.decodeIfPresent([APIToolCall].self, forKey: .toolCalls) + images = try c.decodeIfPresent([ImageOutput].self, forKey: .images) + // content can be a plain String OR an array of content blocks + if let text = try? c.decodeIfPresent(String.self, forKey: .content) { + content = text + contentBlockImages = [] + } else if let blocks = try? c.decodeIfPresent([ContentBlock].self, forKey: .content) { + content = blocks.compactMap { $0.text }.joined().nonEmptyOrNil + contentBlockImages = blocks.compactMap { block in + guard block.type == "image_url", let url = block.imageUrl else { return nil } + return ImageOutput(imageUrl: url) + } + } else { + content = nil + contentBlockImages = [] + } + } } enum CodingKeys: String, CodingKey { diff --git a/oAI/Providers/OpenRouterProvider.swift b/oAI/Providers/OpenRouterProvider.swift index a02906b..071b824 100644 --- a/oAI/Providers/OpenRouterProvider.swift +++ b/oAI/Providers/OpenRouterProvider.swift @@ -160,8 +160,17 @@ class OpenRouterProvider: AIProvider { throw ProviderError.unknown("HTTP \(httpResponse.statusCode)") } + // Debug: log raw response for image gen models + if request.imageGeneration, let rawStr = String(data: data, encoding: .utf8) { + Log.api.debug("Image gen raw response (first 3000 chars): \(rawStr.prefix(3000))") + } + let apiResponse = try JSONDecoder().decode(OpenRouterChatResponse.self, from: data) - return try convertToChatResponse(apiResponse) + let chatResponse = try convertToChatResponse(apiResponse) + if request.imageGeneration { + Log.api.debug("Image gen decoded: content='\(chatResponse.content)', generatedImages=\(chatResponse.generatedImages?.count ?? 0)") + } + return chatResponse } // MARK: - Chat with raw tool messages @@ -396,7 +405,10 @@ class OpenRouterProvider: AIProvider { ToolCallInfo(id: tc.id, type: tc.type, functionName: tc.function.name, arguments: tc.function.arguments) } - let images = choice.message.images.flatMap { decodeImageOutputs($0) } + let topLevelImages = choice.message.images.flatMap { decodeImageOutputs($0) } ?? [] + let blockImages = decodeImageOutputs(choice.message.contentBlockImages) ?? [] + let allImages = topLevelImages + blockImages + let images: [Data]? = allImages.isEmpty ? nil : allImages return ChatResponse( id: apiResponse.id, diff --git a/oAI/ViewModels/ChatViewModel.swift b/oAI/ViewModels/ChatViewModel.swift index 9fc86fe..ee19e11 100644 --- a/oAI/ViewModels/ChatViewModel.swift +++ b/oAI/ViewModels/ChatViewModel.swift @@ -1265,6 +1265,7 @@ Don't narrate future actions ("Let me...") - just use the tools. private func generateAIResponseWithTools(provider: AIProvider, modelId: String) { let mcp = MCPService.shared + Log.ui.info("generateAIResponseWithTools: model=\(modelId)") isGenerating = true streamingTask?.cancel() @@ -1351,6 +1352,8 @@ Don't narrate future actions ("Let me...") - just use the tools. let maxIterations = 10 // Increased from 5 to reduce hitting client-side limit var finalContent = "" + var finalImages: [Data] = [] + var didContinueAfterImages = false // Only inject temp-file continuation once var totalUsage: ChatResponse.Usage? var hitIterationLimit = false // Track if we exited due to hitting the limit @@ -1379,9 +1382,32 @@ Don't narrate future actions ("Let me...") - just use the tools. let toolCalls = structuredCalls.isEmpty ? textCalls : structuredCalls guard !toolCalls.isEmpty else { - // No tool calls — this is the final text response - // Strip any unparseable tool call text from display + // No tool calls — this is the final response finalContent = response.content + if let images = response.generatedImages { finalImages = images } + Log.ui.debug("Tools final response: content='\(response.content.prefix(80))', images=\(response.generatedImages?.count ?? 0)") + + // If images were generated and tools are available, save to temp files + // and continue the loop so the model can save them to the requested path. + if !finalImages.isEmpty && !didContinueAfterImages && iteration < maxIterations - 1 { + didContinueAfterImages = true + let timestamp = Int(Date().timeIntervalSince1970) + let tempPaths: [String] = finalImages.enumerated().compactMap { i, imgData in + let path = "/tmp/oai_generated_\(timestamp)_\(i).png" + let ok = FileManager.default.createFile(atPath: path, contents: imgData) + Log.ui.debug("Saved generated image to temp: \(path) ok=\(ok)") + return ok ? path : nil + } + if !tempPaths.isEmpty { + let pathList = tempPaths.joined(separator: ", ") + let assistantContent = response.content.isEmpty ? "[Image generated]" : response.content + apiMessages.append(["role": "assistant", "content": assistantContent]) + apiMessages.append(["role": "user", "content": "The image(s) have been generated and temporarily saved to: \(pathList). Please save them to the requested destination(s) using the available tools (bash or MCP write)."]) + finalImages = [] + finalContent = "" + continue + } + } break } @@ -1491,7 +1517,8 @@ Don't narrate future actions ("Let me...") - just use the tools. attachments: nil, responseTime: responseTime, wasInterrupted: wasCancelled, - modelId: modelId + modelId: modelId, + generatedImages: finalImages.isEmpty ? nil : finalImages ) messages.append(assistantMessage) @@ -1935,12 +1962,11 @@ Don't narrate future actions ("Let me...") - just use the tools. func detectGoodbyePhrase(in text: String) -> Bool { let lowercased = text.lowercased() let goodbyePhrases = [ - "bye", "goodbye", "bye bye", - "thanks", "thank you", "thx", "ty", + "bye", "goodbye", "bye bye", "good bye", "that's all", "thats all", "that'll be all", - "done", "i'm done", "we're done", + "i'm done", "we're done", "see you", "see ya", "catch you later", - "have a good", "have a nice" + "have a good day", "have a nice day" ] return goodbyePhrases.contains { phrase in diff --git a/oAI/Views/Main/ContentView.swift b/oAI/Views/Main/ContentView.swift index 2de9db0..28dba53 100644 --- a/oAI/Views/Main/ContentView.swift +++ b/oAI/Views/Main/ContentView.swift @@ -44,7 +44,7 @@ struct ContentView: View { #endif } } - .frame(minWidth: 640, minHeight: 400) + .frame(minWidth: 860, minHeight: 560) #if os(macOS) .onAppear { NSApplication.shared.windows.forEach { $0.tabbingMode = .disallowed } @@ -120,24 +120,24 @@ struct ContentView: View { private var macOSToolbar: some ToolbarContent { let settings = SettingsService.shared let showLabels = settings.showToolbarLabels - let scale = iconScale(for: settings.toolbarIconSize) + let iconSize = settings.toolbarIconSize ToolbarItemGroup(placement: .automatic) { // New conversation Button(action: { chatViewModel.newConversation() }) { - ToolbarLabel(title: "New Chat", systemImage: "square.and.pencil", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "New Chat", systemImage: "square.and.pencil", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("n", modifiers: .command) .help("New conversation") Button(action: { chatViewModel.showConversations = true }) { - ToolbarLabel(title: "Conversations", systemImage: "clock.arrow.circlepath", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Conversations", systemImage: "clock.arrow.circlepath", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("l", modifiers: .command) .help("Saved conversations (Cmd+L)") Button(action: { chatViewModel.showHistory = true }) { - ToolbarLabel(title: "History", systemImage: "list.bullet", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "History", systemImage: "list.bullet", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("h", modifiers: .command) .help("Command history (Cmd+H)") @@ -145,7 +145,7 @@ struct ContentView: View { Spacer() Button(action: { chatViewModel.showModelSelector = true }) { - ToolbarLabel(title: "Model", systemImage: "cpu", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Model", systemImage: "cpu", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("m", modifiers: .command) .help("Select AI model (Cmd+M)") @@ -155,32 +155,32 @@ struct ContentView: View { chatViewModel.modelInfoTarget = model } }) { - ToolbarLabel(title: "Info", systemImage: "info.circle", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Info", systemImage: "info.circle", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("i", modifiers: .command) .help("Model info (Cmd+I)") .disabled(chatViewModel.selectedModel == nil) Button(action: { chatViewModel.showStats = true }) { - ToolbarLabel(title: "Stats", systemImage: "chart.bar", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Stats", systemImage: "chart.bar", showLabels: showLabels, iconSize: iconSize) } .help("Session statistics") Button(action: { chatViewModel.showCredits = true }) { - ToolbarLabel(title: "Credits", systemImage: "creditcard", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Credits", systemImage: "creditcard", showLabels: showLabels, iconSize: iconSize) } .help("Check API credits") Spacer() Button(action: { chatViewModel.showSettings = true }) { - ToolbarLabel(title: "Settings", systemImage: "gearshape", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Settings", systemImage: "gearshape", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut(",", modifiers: .command) .help("Settings (Cmd+,)") Button(action: { chatViewModel.showHelp = true }) { - ToolbarLabel(title: "Help", systemImage: "questionmark.circle", showLabels: showLabels, scale: scale) + ToolbarLabel(title: "Help", systemImage: "questionmark.circle", showLabels: showLabels, iconSize: iconSize) } .keyboardShortcut("/", modifiers: .command) .help("Help & commands (Cmd+/)") @@ -188,14 +188,6 @@ struct ContentView: View { } #endif - // Helper function to convert icon size to imageScale - private func iconScale(for size: Double) -> Image.Scale { - switch size { - case ...18: return .small - case 19...24: return .medium - default: return .large - } - } } // Helper view for toolbar labels @@ -203,17 +195,41 @@ struct ToolbarLabel: View { let title: LocalizedStringKey let systemImage: String let showLabels: Bool - let scale: Image.Scale + let iconSize: Double + + // imageScale for the original range (≤32); explicit font size for the new extra-large range (>32) + private var scale: Image.Scale { + switch iconSize { + case ...18: return .small + case 19...24: return .medium + default: return .large + } + } var body: some View { - if showLabels { - Label(title, systemImage: systemImage) - .labelStyle(.titleAndIcon) - .imageScale(scale) + if iconSize > 32 { + // Extra-large: explicit font size above the system .large ceiling + // Offset by 16 so slider 34→18pt, 36→20pt, 38→22pt, 40→24pt + if showLabels { + Label(title, systemImage: systemImage) + .labelStyle(.titleAndIcon) + .font(.system(size: iconSize - 16)) + } else { + Label(title, systemImage: systemImage) + .labelStyle(.iconOnly) + .font(.system(size: iconSize - 16)) + } } else { - Label(title, systemImage: systemImage) - .labelStyle(.iconOnly) - .imageScale(scale) + // Original behaviour — imageScale keeps existing look intact + if showLabels { + Label(title, systemImage: systemImage) + .labelStyle(.titleAndIcon) + .imageScale(scale) + } else { + Label(title, systemImage: systemImage) + .labelStyle(.iconOnly) + .imageScale(scale) + } } } } diff --git a/oAI/Views/Screens/SettingsView.swift b/oAI/Views/Screens/SettingsView.swift index 8587285..e682810 100644 --- a/oAI/Views/Screens/SettingsView.swift +++ b/oAI/Views/Screens/SettingsView.swift @@ -749,7 +749,7 @@ It's better to admit "I need more information" or "I cannot do that" than to fak formSection { row("Icon Size") { HStack(spacing: 8) { - Slider(value: $settingsService.toolbarIconSize, in: 16...32, step: 2) + Slider(value: $settingsService.toolbarIconSize, in: 16...40, step: 2) .frame(maxWidth: 200) Text("\(Int(settingsService.toolbarIconSize)) pt") .font(.system(size: 13)) @@ -1803,6 +1803,13 @@ It's better to admit "I need more information" or "I cannot do that" than to fak Toggle("", isOn: $settingsService.paperlessEnabled) .toggleStyle(.switch) } + VStack(alignment: .leading, spacing: 2) { + Text("⚠️ Beta — Paperless integration is under active development. Some features may be incomplete or behave unexpectedly.") + .font(.caption) + .foregroundStyle(.secondary) + } + .padding(.horizontal, 12) + .padding(.bottom, 8) } } @@ -2092,13 +2099,13 @@ It's better to admit "I need more information" or "I cannot do that" than to fak .foregroundStyle(selectedTab == tag ? .blue : .secondary) if beta { Text("β") - .font(.system(size: 8, weight: .bold)) + .font(.system(size: 9, weight: .heavy)) .foregroundStyle(.white) - .padding(.horizontal, 3) - .padding(.vertical, 1) + .padding(.horizontal, 4) + .padding(.vertical, 2) .background(Color.orange) .clipShape(Capsule()) - .offset(x: 6, y: -2) + .offset(x: 8, y: -3) } } Text(label)