Fixed bug in cost display and handling
This commit is contained in:
82
oai.py
82
oai.py
@@ -4532,12 +4532,24 @@ def chat():
|
|||||||
current_index = len(session_history) - 1
|
current_index = len(session_history) - 1
|
||||||
if conversation_memory_enabled:
|
if conversation_memory_enabled:
|
||||||
memory_start_index = 0
|
memory_start_index = 0
|
||||||
|
|
||||||
|
# Recalculate session totals from loaded message history
|
||||||
total_input_tokens = 0
|
total_input_tokens = 0
|
||||||
total_output_tokens = 0
|
total_output_tokens = 0
|
||||||
total_cost = 0.0
|
total_cost = 0.0
|
||||||
message_count = 0
|
message_count = 0
|
||||||
|
|
||||||
|
for msg in session_history:
|
||||||
|
# Handle both old format (no cost data) and new format (with cost data)
|
||||||
|
total_input_tokens += msg.get('prompt_tokens', 0)
|
||||||
|
total_output_tokens += msg.get('completion_tokens', 0)
|
||||||
|
total_cost += msg.get('msg_cost', 0.0)
|
||||||
|
message_count += 1
|
||||||
|
|
||||||
console.print(f"[bold green]Conversation '{conversation_name}' loaded with {len(session_history)} messages.[/]")
|
console.print(f"[bold green]Conversation '{conversation_name}' loaded with {len(session_history)} messages.[/]")
|
||||||
app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages")
|
if total_cost > 0:
|
||||||
|
console.print(f"[dim cyan]Restored session totals: {total_input_tokens + total_output_tokens} tokens, ${total_cost:.4f} cost[/]")
|
||||||
|
app_logger.info(f"Conversation '{conversation_name}' loaded with {len(session_history)} messages, restored cost: ${total_cost:.4f}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif user_input.lower().startswith("/delete"):
|
elif user_input.lower().startswith("/delete"):
|
||||||
@@ -4680,8 +4692,8 @@ def chat():
|
|||||||
total_output_tokens = 0
|
total_output_tokens = 0
|
||||||
total_cost = 0.0
|
total_cost = 0.0
|
||||||
message_count = 0
|
message_count = 0
|
||||||
console.print("[bold green]Conversation context reset.[/]")
|
console.print("[bold green]Conversation context reset. Totals cleared.[/]")
|
||||||
app_logger.info("Conversation context reset by user")
|
app_logger.info("Conversation context reset by user - all totals reset to 0")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif user_input.lower().startswith("/info"):
|
elif user_input.lower().startswith("/info"):
|
||||||
@@ -5776,10 +5788,13 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
|
|||||||
stream_interrupted = False
|
stream_interrupted = False
|
||||||
|
|
||||||
if is_streaming:
|
if is_streaming:
|
||||||
|
# Store the last chunk to get usage data after streaming completes
|
||||||
|
last_chunk = None
|
||||||
try:
|
try:
|
||||||
with Live("", console=console, refresh_per_second=10, auto_refresh=True) as live:
|
with Live("", console=console, refresh_per_second=10, auto_refresh=True) as live:
|
||||||
try:
|
try:
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
|
last_chunk = chunk # Keep track of last chunk for usage data
|
||||||
if hasattr(chunk, 'error') and chunk.error:
|
if hasattr(chunk, 'error') and chunk.error:
|
||||||
console.print(f"\n[bold red]Stream error: {chunk.error.message}[/]")
|
console.print(f"\n[bold red]Stream error: {chunk.error.message}[/]")
|
||||||
app_logger.error(f"Stream error: {chunk.error.message}")
|
app_logger.error(f"Stream error: {chunk.error.message}")
|
||||||
@@ -5835,6 +5850,14 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
continue # Now it's safe to continue
|
continue # Now it's safe to continue
|
||||||
|
|
||||||
|
# For streaming, try to get usage from last chunk
|
||||||
|
if last_chunk and hasattr(last_chunk, 'usage'):
|
||||||
|
response.usage = last_chunk.usage
|
||||||
|
app_logger.debug("Extracted usage data from last streaming chunk")
|
||||||
|
elif last_chunk:
|
||||||
|
app_logger.warning("Last streaming chunk has no usage data")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
full_response = response.choices[0].message.content if response.choices else ""
|
full_response = response.choices[0].message.content if response.choices else ""
|
||||||
# Clear any processing messages before showing response
|
# Clear any processing messages before showing response
|
||||||
@@ -5847,13 +5870,54 @@ All queries are read-only. INSERT/UPDATE/DELETE are not allowed."""
|
|||||||
console.print()
|
console.print()
|
||||||
console.print(Panel(md, title="[bold green]AI Response[/]", title_align="left", border_style="green"))
|
console.print(Panel(md, title="[bold green]AI Response[/]", title_align="left", border_style="green"))
|
||||||
|
|
||||||
session_history.append({'prompt': user_input, 'response': full_response})
|
# Extract usage data BEFORE appending to history
|
||||||
current_index = len(session_history) - 1
|
|
||||||
|
|
||||||
usage = getattr(response, 'usage', None)
|
usage = getattr(response, 'usage', None)
|
||||||
input_tokens = usage.input_tokens if usage and hasattr(usage, 'input_tokens') else 0
|
|
||||||
output_tokens = usage.output_tokens if usage and hasattr(usage, 'output_tokens') else 0
|
# DEBUG: Log what OpenRouter actually returns
|
||||||
msg_cost = usage.total_cost_usd if usage and hasattr(usage, 'total_cost_usd') else estimate_cost(input_tokens, output_tokens)
|
if usage:
|
||||||
|
app_logger.debug(f"Usage object type: {type(usage)}")
|
||||||
|
app_logger.debug(f"Usage attributes: {dir(usage)}")
|
||||||
|
if hasattr(usage, '__dict__'):
|
||||||
|
app_logger.debug(f"Usage dict: {usage.__dict__}")
|
||||||
|
else:
|
||||||
|
app_logger.warning("No usage object in response!")
|
||||||
|
|
||||||
|
# Try both attribute naming conventions (OpenAI standard vs Anthropic)
|
||||||
|
input_tokens = 0
|
||||||
|
output_tokens = 0
|
||||||
|
|
||||||
|
if usage:
|
||||||
|
# Try prompt_tokens/completion_tokens (OpenAI/OpenRouter standard)
|
||||||
|
if hasattr(usage, 'prompt_tokens'):
|
||||||
|
input_tokens = usage.prompt_tokens or 0
|
||||||
|
elif hasattr(usage, 'input_tokens'):
|
||||||
|
input_tokens = usage.input_tokens or 0
|
||||||
|
|
||||||
|
if hasattr(usage, 'completion_tokens'):
|
||||||
|
output_tokens = usage.completion_tokens or 0
|
||||||
|
elif hasattr(usage, 'output_tokens'):
|
||||||
|
output_tokens = usage.output_tokens or 0
|
||||||
|
|
||||||
|
app_logger.debug(f"Extracted tokens: input={input_tokens}, output={output_tokens}")
|
||||||
|
|
||||||
|
# Get cost from API or estimate
|
||||||
|
msg_cost = 0.0
|
||||||
|
if usage and hasattr(usage, 'total_cost_usd') and usage.total_cost_usd:
|
||||||
|
msg_cost = float(usage.total_cost_usd)
|
||||||
|
app_logger.debug(f"Using API cost: ${msg_cost:.6f}")
|
||||||
|
else:
|
||||||
|
msg_cost = estimate_cost(input_tokens, output_tokens)
|
||||||
|
app_logger.debug(f"Estimated cost: ${msg_cost:.6f} (from {input_tokens} input + {output_tokens} output tokens)")
|
||||||
|
|
||||||
|
# NOW append to history with cost data
|
||||||
|
session_history.append({
|
||||||
|
'prompt': user_input,
|
||||||
|
'response': full_response,
|
||||||
|
'msg_cost': msg_cost,
|
||||||
|
'prompt_tokens': input_tokens,
|
||||||
|
'completion_tokens': output_tokens
|
||||||
|
})
|
||||||
|
current_index = len(session_history) - 1
|
||||||
|
|
||||||
total_input_tokens += input_tokens
|
total_input_tokens += input_tokens
|
||||||
total_output_tokens += output_tokens
|
total_output_tokens += output_tokens
|
||||||
|
|||||||
Reference in New Issue
Block a user