Your own RESTful requests: less bloat; less help.
Most of all: you don’t have to learn others’ code before you can do; the doing is the learning.
Here’s a code sample where OpenAI has done a bit of work in the case of structured outputs - make a Pydantic BaseModel
as your response format (instead of writing a JSON schema with not one comma misplaced), and the SDK will convert it and send for you:
from pydantic import BaseModel
from openai import OpenAI
class ExpertResponse(BaseModel):
topic: str
chat_title_four_words: str
output_fulfillment_intro: str
complete_answer: str
client = OpenAI()
response = client.responses.parse(
model="o3-mini",
input="Expert responds: babboons are closely related to what?",
text_format=ExpertResponse,
max_output_tokens=2048,
store=False,
stream=False, # True is incompatible with parse()
) # will raise if unparsable
if False: # True: iterates over multiple items returned by reasoning models
for index, out_item in enumerate(response.output):
# Check if the output item has a 'content' attribute
if hasattr(out_item, "content"):
found = False
for element in out_item.content:
if hasattr(element, "text"):
if not found:
print(f"context found in output item index {index}:")
found = True
print(element.text)
for index, out_item in enumerate(response.output):
if getattr(out_item, "type", None) == "message":
content_elements = getattr(out_item, "content", [])
for element in content_elements:
text = getattr(element, "text", None)
if text:
print(f"\n[Output item {index}]:\n{text}")
#print("parsed:\n", response.output_parsed)
print(
"-" * 40,
f"\n{response.output_parsed.topic} - "
f"{response.output_parsed.chat_title_four_words}\n"
f"{response.output_parsed.output_fulfillment_intro}\n"
f"{response.output_parsed.complete_answer}\n"
)
I don’t have a “Quickstart” handy for using a library like requests
or httpx
, because one immediately goes into streaming, asyncio, handling and gathering SSE events, tool iterations, etc.
Can O3
turn hundreds of lines of code and some dumps of SSE streamed events into a minimum no-SDK streaming example?
Sure can. Here you go!
"""
Minimal, self‑contained example that shows how to call the *Responses* endpoint
with httpx, stream the Server‑Sent Events, and print the running “delta” text
segments that come back.
Only three things are required:
1. A valid OPENAI_API_KEY in your environment.
2. `pip install httpx`.
3. An `input_messages` list built exactly the same way you would for Chat
Completions (role/content pairs), e.g.:
input_messages = [
{"role": "user", "content": "Say hello in five words."}
]
"""
import os
import json
import httpx
def call_responses(input_messages: list[dict]) -> None:
url = "https://api.openai.com/v1/responses"
headers = {
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
"Content-Type": "application/json",
}
# The bare‑minimum request body.
body = {
"model": "gpt-4o-mini",
"input": input_messages,
"stream": True, # get an event stream back
}
# OpenAI still speaks HTTP/1.1 & HTTP/2, so the regular httpx client works.
with httpx.Client(timeout=None) as client:
with client.stream("POST", url, headers=headers, json=body) as resp:
resp.raise_for_status()
event_lines: list[str] = []
for line in resp.iter_lines(): # SSE = text lines
if line == "": # blank line → event boundary
if not event_lines:
continue # ignore extra blanks
# very small & permissive event parser
event_type = None
data_json = None
for l in event_lines:
if l.startswith("event:"):
event_type = l[len("event:"):].strip()
elif l.startswith("data:"):
data_json = json.loads(l[len("data:"):].strip())
event_lines.clear()
# We only care about streaming deltas
if event_type == "response.output_text.delta":
print(data_json["delta"], end="", flush=True)
# When the assistant is finished you will also receive the
# response.output_text.done and response.completed events.
else:
event_lines.append(line)
print() # newline after the final token
# ---------------------------------------------------------------------------
if __name__ == "__main__":
input_messages = [
{"role": "user", "content": "Give me a catchy startup name."}
]
call_responses(input_messages)
“Performance” depends on if you want to build a high-throughput backend with uvloop and IP address pinning to NICs and CPU core threads…