Subtraction.
Note in this function, where uncached is defined internally by:
f"uncached: {(uncached := total_prompt_tokens - cached_prompt_tokens)}",
Responses API has renamed fields vs Chat Completions, so the function would be working on Responses “usage” input values remapped:
input_tokens - cached_tokens
def pretty_usage_table(usage_data: dict, one_line=False) -> str:
'''Returns printable multi-line table string with usage, optionally a single line
Compatible with Responses or Chat Completions API; prints only useful information
Chat Completions API received usage example:
{
"completion_tokens": 75,
"prompt_tokens": 1289,
"total_tokens": 1364,
"completion_tokens_details": {
"audio_tokens": 0, # portion billed at the higher rate of audio
"reasoning_tokens": 64, # portion billed that was unseen reasoning
"accepted_prediction_tokens": 0, # informational, portion of "prediction" input matched
"rejected_prediction_tokens": 0 # billed unmatched input, not exclusive of `accepted` and can total more than sent
},
"prompt_tokens_details": {
"audio_tokens": 0, # portion billed at the higher rate of audio
"cached_tokens": 1152 # portion discounted by matching prior context window
}
}
Responses API received usage example:
{
"input_tokens": 1289,
"input_tokens_details": {
"cached_tokens": 0 # portion discounted by matching prior context window
},
"output_tokens": 685,
"output_tokens_details": {
"reasoning_tokens": 640 # portion billed that was unseen reasoning
},
"total_tokens": 1974
}
'''
import json
print(f"\nreceived usage:\n{json.dumps(usage_data, indent=3)}\n") # can comment out after debugging
# process any usage object input to chat completions form
normalized_usage = {
key.replace("input_", "prompt_").replace("output_", "completion_"): value
for key, value in usage_data.items()
}
# Totals and detail breakdowns
total_prompt_tokens = normalized_usage.get("prompt_tokens", 0)
total_completion_tokens = normalized_usage.get("completion_tokens", 0)
prompt_detail = normalized_usage.get("prompt_tokens_details", {})
completion_detail = normalized_usage.get("completion_tokens_details", {})
cached_prompt_tokens = prompt_detail.get("cached_tokens", 0)
audio_prompt_tokens = prompt_detail.get("audio_tokens", 0)
reasoning_completion_tokens = completion_detail.get("reasoning_tokens", 0)
audio_completion_tokens = completion_detail.get("audio_tokens", 0)
# Prepare columns with intermediate assignments via walrus
prompt_column: list[str] = [
f"input tokens: {total_prompt_tokens}",
f"uncached: {(uncached := total_prompt_tokens - cached_prompt_tokens)}",
f"cached: {cached_prompt_tokens}",
]
completion_column: list[str] = [
f"output tokens: {total_completion_tokens}",
f"non-reasoning: {(nonreasoning := total_completion_tokens - reasoning_completion_tokens)}",
f"reasoning: {reasoning_completion_tokens}",
]
# Include audio breakdown if present
if audio_prompt_tokens or audio_completion_tokens:
prompt_column.append(f"non-audio: {(nonaudio_prompt := total_prompt_tokens - audio_prompt_tokens)}")
prompt_column.append(f"audio: {audio_prompt_tokens}")
completion_column.append(f"non-audio: {(nonaudio_completion := total_completion_tokens - audio_completion_tokens)}")
completion_column.append(f"audio: {audio_completion_tokens}")
# Determine column widths
prompt_width = max(len(cell) for cell in prompt_column)
completion_width = max(len(cell) for cell in completion_column)
# Build table lines
table_lines: list[str] = []
table_lines.append(f"| {'-' * prompt_width} | {'-' * completion_width} |")
table_lines.append(f"| {prompt_column[0].ljust(prompt_width)} | {completion_column[0].ljust(completion_width)} |")
table_lines.append(f"| {'-' * prompt_width} | {'-' * completion_width} |")
for left_cell, right_cell in zip(prompt_column[1:], completion_column[1:]):
table_lines.append(f"| {left_cell.ljust(prompt_width)} | {right_cell.ljust(completion_width)} |")
# One-line summary uses the earlier assignments
prompt_audio_str = f", audio {audio_prompt_tokens}" if audio_prompt_tokens else ""
completion_audio_str = f", audio {audio_completion_tokens}" if audio_completion_tokens else ""
single_line = (
f"input: {total_prompt_tokens} (uncached {uncached}, cached {cached_prompt_tokens}{prompt_audio_str}); "
f"output: {total_completion_tokens} (non-reasoning {nonreasoning}, "
f"reasoning {reasoning_completion_tokens}{completion_audio_str})"
)
return "\n" + single_line if one_line else "\n" + "\n".join(table_lines)
Near the top, I added a print line so you can see the usage input to the function.
The function delivers a string to print; it also works as documentation.
| ------------------ | ----------------- |
| input tokens: 1289 | output tokens: 75 |
| ------------------ | ----------------- |
| uncached: 137 | non-reasoning: 11 |
| cached: 1152 | reasoning: 64 |
You could simply write a new code function that adds the extra values to “usage” that might be useful. GPT-5 thinking in ChatGPT I discover is too idiotic and non-functional to follow instructions to do so based on this function as API documentation and the exact input and output of the function needed; o3 also bad, creating output incompatible with the input. Here’s Claude Sonnet in 0-shot enhancing your usage dict for you:
def enhanced_usage_dict(usage: dict) -> dict:
"""Add calculated fields to OpenAI usage objects from Chat Completions or Responses API.
Accepts usage dict from either API format and returns the same dict with additional
calculated fields added to the details objects and top level as appropriate.
Args:
usage: Usage dict extracted from API response (use .model_dump() if using OpenAI SDK)
Returns:
Enhanced usage dict with additional calculated fields
Raises:
ValueError: If input is not a dict
Chat Completions API usage example:
{
"completion_tokens": 75,
"prompt_tokens": 1289,
"total_tokens": 1364,
"completion_tokens_details": {
"audio_tokens": 0,
"reasoning_tokens": 64,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 1152
}
}
Responses API usage example:
{
"input_tokens": 1289,
"input_tokens_details": {
"cached_tokens": 0
},
"output_tokens": 685,
"output_tokens_details": {
"reasoning_tokens": 640
},
"total_tokens": 1974
}
"""
# Validate input type
if not isinstance(usage, dict):
raise ValueError("dict input required; try `response.usage.model_dump()`")
# Create a copy to avoid mutating the original
enhanced = usage.copy()
# Detect API format and normalize field names
is_responses_api = "input_tokens" in usage or "output_tokens" in usage
# Get base token counts
if is_responses_api:
total_input = enhanced.get("input_tokens", 0)
total_output = enhanced.get("output_tokens", 0)
input_details_key = "input_tokens_details"
output_details_key = "output_tokens_details"
else:
total_input = enhanced.get("prompt_tokens", 0)
total_output = enhanced.get("completion_tokens", 0)
input_details_key = "prompt_tokens_details"
output_details_key = "completion_tokens_details"
# Ensure details dicts exist
if input_details_key not in enhanced:
enhanced[input_details_key] = {}
if output_details_key not in enhanced:
enhanced[output_details_key] = {}
input_details = enhanced[input_details_key]
output_details = enhanced[output_details_key]
# Calculate and add input/prompt token breakdowns
cached_tokens = input_details.get("cached_tokens", 0)
audio_input_tokens = input_details.get("audio_tokens", 0)
input_details["uncached_tokens"] = total_input - cached_tokens
if audio_input_tokens > 0:
input_details["non_audio_tokens"] = total_input - audio_input_tokens
# Calculate and add output/completion token breakdowns
reasoning_tokens = output_details.get("reasoning_tokens", 0)
audio_output_tokens = output_details.get("audio_tokens", 0)
output_details["non_reasoning_tokens"] = total_output - reasoning_tokens
if audio_output_tokens > 0:
output_details["non_audio_tokens"] = total_output - audio_output_tokens
return enhanced
Then you are provided a dict with additional fields:
{
"input_tokens": 1289,
"input_tokens_details": {
"cached_tokens": 0,
"uncached_tokens": 1289 # NEW: 1289 - 0
},
"output_tokens": 685,
"output_tokens_details": {
"reasoning_tokens": 640,
"non_reasoning_tokens": 45 # NEW: 685 - 640
},
"total_tokens": 1974
}