Cancelling Stream Does Not Show Usage

When we cancel a stream OpenAI does not tell us how much it was charged. See this example code:

import asyncio
import base64
from pathlib import Path

from openai import AsyncOpenAI

from utils.storage_utils import get_mime_type

MAX_CHUNKS = 20


async def main():
    client = AsyncOpenAI()
    pdf_path = Path(__file__).parent / "test.pdf"
    pdf_bytes = pdf_path.read_bytes()
    filename = pdf_path.name

    file_content = [
        {
            "type": "input_file",
            "filename": filename,
            "file_data": f"data:{get_mime_type(filename)};base64,{base64.b64encode(pdf_bytes).decode()}",
        }
    ]

    stream = await client.responses.create(
        model="gpt-5-mini",
        input=[{"role": "user", "content": file_content + [{"type": "input_text", "text": "Can you summarise this file?"}]}],
        stream=True,
        background=True,
    )

    response_id = None
    chunk_count = 0

    async for event in stream:
        if event.type == "response.created":
            response_id = event.response.id
            print(f"Response ID: {response_id}\n")

        elif event.type == "response.output_text.delta":
            print(event.delta, end="", flush=True)
            chunk_count += 1

            if chunk_count >= MAX_CHUNKS:
                print(f"\n\n--- cancelling after {chunk_count} chunks ---\n")
                cancelled = await client.responses.cancel(response_id)
                print(f"[cancel]    status: {cancelled.status}")
                print(f"[cancel]    usage:  {cancelled.usage}")
                break

        elif event.type == "response.completed":
            print(f"\n[stream]    status: {event.response.status}")
            print(f"[stream]    usage:  {event.response.usage}")

    await stream.close()

    if response_id:
        result = await client.responses.retrieve(response_id)
        print(f"\n[retrieve]  status: {result.status}")
        print(f"[retrieve]  usage:  {result.usage}")


if __name__ == "__main__":
    asyncio.run(main())

The output is:

Response ID: resp_*

Here’s a concise summary of the acta (hearing minutes):

- Court and process
 

--- cancelling after 20 chunks ---

[cancel]    status: cancelled
[cancel]    usage:  None

[retrieve]  status: cancelled
[retrieve]  usage:  None
1 Like

Hey Joao_Abrantes,

This is expected behavior with streaming responses.

Usage is included on the final stream event/chunk. If the stream is cancelled or interrupted before completion, that final usage payload is not emitted, so usage may be missing in the streamed output. Billing/usage is still recorded on our backend; this only affects what is returned in the incomplete stream response. Apologies for the confusion.

1 Like

Why can’t you improve this? Obviously people still need to know the cost of cancelled streams, so this situation is not ideal…