Another thing to note: if using the Responses endpoint, the answer will come back in an array, where there is a (empty) reasoning summary as the first, and the actual content in the second. So, naive parsing of the JSON response can also be a reason you could be receiving nothing, even on “hello”.
It’s a challenge to show both “API example” and “good practice” at once, but I poked away at this a bit for you - Python code to chat with o3 at the console.
__doc__ = "Chat Completions Python demo chatbot; no SDK, no stream, no async"
import os
import time
import logging
# `pip install httpx` if you don't have it
import httpx
logger = logging.getLogger(__name__)
def send_chat_request(
conversation_messages: list[dict[str, str]],
model: str = "gpt-4o-mini",
max_tokens: int = 4000,
*,
timeout: float = 900.0,
) -> tuple[httpx.Response, float]:
"""
Call the OpenAI chat-completions endpoint with the supplied message list.
OPENAI_API_KEY environment variable is used
Parameters
----------
conversation_messages : List[Dict[str, str]]
A list of dicts, each with 'role' and 'content' keys.
model : str, optional
max_tokens : int, optional
timeout (keyword) : float, optional
Returns
-------
Tuple[httpx.Response, float]
Response object and elapsed time in seconds.
(you can easily add response headers with rate limits)
Raises
------
ValueError
If OPENAI_API_KEY environment variable is unset.
httpx.HTTPStatusError
If the response has an HTTP error status, like 429 = no credits.
httpx.RequestError
If a network error occurs.
"""
api_url = "https://api.openai.com/v1/chat/completions"
api_key = os.environ.get("OPENAI_API_KEY") # don't hard-code
if not api_key:
raise ValueError("ERROR: Set the OPENAI_API_KEY environment variable.")
headers = {"Authorization": f"Bearer {api_key}"}
start_time = time.time()
try:
response = httpx.post(
api_url,
headers=headers,
json={
"model": model,
"messages": conversation_messages,
"max_completion_tokens" : max_tokens,
},
timeout=timeout,
)
response.raise_for_status()
return response, time.time() - start_time
except httpx.HTTPStatusError as err:
logger.error(f"HTTP Err {err.response.status_code}: {err.response.text}")
raise
except httpx.RequestError as err:
logger.error(f"Request Error: {err}")
raise
# Chat application pattern as script, where exit/break gives you ai_response
MODEL_NAME = "o3" # start with "gpt-4o-mini"
MAX_TOKENS = None # Reasoning models need high value or None
MAX_HISTORY_LENGTH = 20 # 20 == 10 user inputs
SYSTEM_PROMPT = """
You are a helpful AI assistant, employing your expertise and vast world knowledge.
With internal planning, you fulfill every input truthfully, accurately, and robustly.
""".strip()
system_message = {
"role": "developer" if MODEL_NAME.startswith("o") else "system",
"content": SYSTEM_PROMPT
}
conversation_history: list[dict[str, str]] = []
ai_response: httpx.Response | None = None
print(f"Type your prompt to {MODEL_NAME}. Enter “exit” to quit.", end="\n\n")
# A chatbot session sends repeatedly, growing a message context list
while True:
user_input = input("prompt> ").strip()
if user_input.lower() == "exit":
print("\nExiting. Inspect `resp` in a REPL for full details if desired.")
break
user_message = {"role": "user", "content": user_input}
recent_history = conversation_history[-MAX_HISTORY_LENGTH:]
messages = [system_message, *recent_history, user_message]
# Here, send_chat_request is purposefully allowed to raise traceback
ai_response, ai_duration = send_chat_request(
messages,
model=MODEL_NAME,
max_tokens=MAX_TOKENS,
)
# Parse out stuff we want and expect: just text content from assistant
try:
assistant_reply = ai_response.json()["choices"][0]["message"]["content"]
ai_usage = ai_response.json()["usage"]
except (KeyError, IndexError, ValueError) as parse_err:
print(f"Failed to parse response – {parse_err}", file=sys.stderr)
continue
# Add to a conversation history only after success (or could retry it)
conversation_history.append(user_message)
conversation_history.append({"role": "assistant", "content": assistant_reply})
print("assistant>", assistant_reply)
print(f"total time: {ai_duration:.1f}s")
print(ai_usage)