2.1 #2

Merged
rune merged 10 commits from 2.1 into main 2026-02-03 09:02:44 +01:00
Showing only changes of commit 9a76ce2c1f - Show all commits

82
oai.py
View File

@@ -4532,12 +4532,24 @@ def chat():
current_index = len(session_history) - 1 current_index = len(session_history) - 1
if conversation_memory_enabled: if conversation_memory_enabled:
memory_start_index = 0 memory_start_index = 0
# Recalculate session totals from loaded message history
total_input_tokens = 0 total_input_tokens = 0
total_output_tokens = 0 total_output_tokens = 0
total_cost = 0.0 total_cost = 0.0
message_count = 0 message_count = 0
for msg in session_history:
# Handle both old format (no cost data) and new format (with cost data)
total_input_tokens += msg.get('prompt_tokens', 0)
total_output_tokens += msg.get('completion_tokens', 0)
total_cost += msg.get('msg_cost', 0.0)
message_count += 1
console.print(f"[bold green]Conversation '{conversation_name}' loaded with {len(session_history)} messages.[/]") console.print(f"[bold green]Conversation '{conversation_name}' loaded with {len(session_history)} messages.[/]")
app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages") if total_cost > 0:
console.print(f"[dim cyan]Restored session totals: {total_input_tokens + total_output_tokens} tokens, ${total_cost:.4f} cost[/]")
app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages, restored cost: ${total_cost:.4f}")
continue continue
elif user_input.lower().startswith("/delete"): elif user_input.lower().startswith("/delete"):
@@ -4680,8 +4692,8 @@ def chat():
total_output_tokens = 0 total_output_tokens = 0
total_cost = 0.0 total_cost = 0.0
message_count = 0 message_count = 0
console.print("[bold green]Conversation context reset.[/]") console.print("[bold green]Conversation context reset. Totals cleared.[/]")
app_logger.info("Conversation context reset by user") app_logger.info("Conversation context reset by user - all totals reset to 0")
continue continue
elif user_input.lower().startswith("/info"): elif user_input.lower().startswith("/info"):
@@ -5776,10 +5788,13 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
stream_interrupted = False stream_interrupted = False
if is_streaming: if is_streaming:
# Store the last chunk to get usage data after streaming completes
last_chunk = None
try: try:
with Live("", console=console, refresh_per_second=10, auto_refresh=True) as live: with Live("", console=console, refresh_per_second=10, auto_refresh=True) as live:
try: try:
for chunk in response: for chunk in response:
last_chunk = chunk # Keep track of last chunk for usage data
if hasattr(chunk, 'error') and chunk.error: if hasattr(chunk, 'error') and chunk.error:
console.print(f"\n[bold red]Stream error: {chunk.error.message}[/]") console.print(f"\n[bold red]Stream error: {chunk.error.message}[/]")
app_logger.error(f"Stream error: {chunk.error.message}") app_logger.error(f"Stream error: {chunk.error.message}")
@@ -5835,6 +5850,14 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
pass pass
continue # Now it's safe to continue continue # Now it's safe to continue
# For streaming, try to get usage from last chunk
if last_chunk and hasattr(last_chunk, 'usage'):
response.usage = last_chunk.usage
app_logger.debug("Extracted usage data from last streaming chunk")
elif last_chunk:
app_logger.warning("Last streaming chunk has no usage data")
else: else:
full_response = response.choices[0].message.content if response.choices else "" full_response = response.choices[0].message.content if response.choices else ""
# Clear any processing messages before showing response # Clear any processing messages before showing response
@@ -5847,13 +5870,54 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
console.print() console.print()
console.print(Panel(md, title="[bold green]AI Response[/]", title_align="left", border_style="green")) console.print(Panel(md, title="[bold green]AI Response[/]", title_align="left", border_style="green"))
session_history.append({'prompt': user_input, 'response': full_response}) # Extract usage data BEFORE appending to history
current_index = len(session_history) - 1
usage = getattr(response, 'usage', None) usage = getattr(response, 'usage', None)
input_tokens = usage.input_tokens if usage and hasattr(usage, 'input_tokens') else 0
output_tokens = usage.output_tokens if usage and hasattr(usage, 'output_tokens') else 0 # DEBUG: Log what OpenRouter actually returns
msg_cost = usage.total_cost_usd if usage and hasattr(usage, 'total_cost_usd') else estimate_cost(input_tokens, output_tokens) if usage:
app_logger.debug(f"Usage object type: {type(usage)}")
app_logger.debug(f"Usage attributes: {dir(usage)}")
if hasattr(usage, '__dict__'):
app_logger.debug(f"Usage dict: {usage.__dict__}")
else:
app_logger.warning("No usage object in response!")
# Try both attribute naming conventions (OpenAI standard vs Anthropic)
input_tokens = 0
output_tokens = 0
if usage:
# Try prompt_tokens/completion_tokens (OpenAI/OpenRouter standard)
if hasattr(usage, 'prompt_tokens'):
input_tokens = usage.prompt_tokens or 0
elif hasattr(usage, 'input_tokens'):
input_tokens = usage.input_tokens or 0
if hasattr(usage, 'completion_tokens'):
output_tokens = usage.completion_tokens or 0
elif hasattr(usage, 'output_tokens'):
output_tokens = usage.output_tokens or 0
app_logger.debug(f"Extracted tokens: input={input_tokens}, output={output_tokens}")
# Get cost from API or estimate
msg_cost = 0.0
if usage and hasattr(usage, 'total_cost_usd') and usage.total_cost_usd:
msg_cost = float(usage.total_cost_usd)
app_logger.debug(f"Using API cost: ${msg_cost:.6f}")
else:
msg_cost = estimate_cost(input_tokens, output_tokens)
app_logger.debug(f"Estimated cost: ${msg_cost:.6f} (from {input_tokens} input + {output_tokens} output tokens)")
# NOW append to history with cost data
session_history.append({
'prompt': user_input,
'response': full_response,
'msg_cost': msg_cost,
'prompt_tokens': input_tokens,
'completion_tokens': output_tokens
})
current_index = len(session_history) - 1
total_input_tokens += input_tokens total_input_tokens += input_tokens
total_output_tokens += output_tokens total_output_tokens += output_tokens