The main fault that you have is that you have set the max_output_tokens value far too low. It needs to be more like 10000, of a possible 128000.
“gpt-5” (along with o4-mini, o3, etc) are reasoning AI models. They produce internal tokens of thought that you do not receive, which are also billed as output. The maximum token setting is a budget of the maximum expense you will spend, seen or unseen, and it will terminate the AI text generation if hit.
Perhaps you need some parameter guidance - what you can send to each of the two “chat” endpoints, how parameters must be dropped based on model and model capabilities (along with your "ID verified status), and especially, where the input of the similar parameter differs.
'''Reference-quality API migration guide: Chat Completions <-> Responses
Extensively demonstrates parameters accepted or denied per endpoint or model
(comments are intentional for API documentation and alternate code use)'''
import os, json, httpx
# Desired developer parameters
common_body = {
"model": "gpt-5-mini",
"temperature": 1.0, # reasoning: no
"top_p": 0.5, # reasoning: no
"service_tier": "priority", # "flex": only gpt-5, o3, or o4-mini
"store": False,
"prompt_cache_key": None,
"safety_identifier": None,
"parallel_tool_calls": False,
"tools": [], # functions: different shape between endpoints
"tool_choice": "auto",
"stream": False,
"stream_options": {
"include_usage": True, # responses: no
"include_obfuscation": False,
},
"stop": [], # responses: no, reasoning: no
"frequency_penalty": 0.01, # responses: no, reasoning: no
"presence_penalty": 0.01, # responses: no, reasoning: no
"n": 1, # responses: no
"logit_bias": {99999:-2}, # responses: no, reasoning: no
"prediction": None, # responses: no, reasoning: no (gpt-4o only parameter)
"modalities": ["text"], # responses: no, reasoning: no (+"audio")
"audio": {"format": "mp3", "voice": "cedar"}, # responses: no, reasoning: no
}
# Developer variables where parameter placement depends on endpoint
max_completion_tokens = 4000
verbosity = "medium" # other than "medium": gpt-5 reasoning only
top_logprobs = 0
reasoning_effort = "low"
response_format = {"type": "text"}
instructions = "A concise assistant provides brief answers."
user_message = """
Ping!
""".strip()
# Only a Responses API feature
reasoning_summary = "auto" # "auto" | "detailed" | None
include_encrypted_content = True
# model gates
is_gpt5 = (
common_body["model"].startswith("gpt-5")
and not common_body["model"].startswith("gpt-5-chat")
)
is_reasoning = is_gpt5 or common_body["model"].startswith(("o3", "o4"))
if is_reasoning:
common_body.pop("temperature", None)
common_body.pop("top_p", None)
common_body.pop("frequency_penalty", None)
common_body.pop("presence_penalty", None)
common_body.pop("logit_bias", None)
common_body.pop("stop", None)
common_body.pop("modalities", None)
common_body.pop("logprobs", None)
common_body.pop("top_logprobs", None)
common_body.pop("audio", None)
common_body.pop("prediction", None)
if not common_body.get("stream", False):
common_body.pop("stream_options")
chatcompletions_body = {
**common_body,
"max_completion_tokens": max_completion_tokens,
**({"reasoning_effort": reasoning_effort} if is_reasoning else {}),
"response_format": response_format,
**({"verbosity": verbosity} if is_gpt5 else {}),
"logprobs": bool(top_logprobs), # 0 = False
**({"top_logprobs": top_logprobs} if top_logprobs else {}), # 0 = don't send
"messages": [
{"role": "system", "content": instructions},
{"role": "user", "content": user_message},
],
}
responses_body = {
**common_body,
"max_output_tokens": max_completion_tokens,
**(
{"reasoning": {"effort": reasoning_effort, "summary": reasoning_summary}}
if is_reasoning
else {}
),
"include": [
*(
["reasoning.encrypted_content"]
if is_reasoning and include_encrypted_content
else []
),
# .. other include types not demonstrated
],
"text": {
"format": response_format,
**({"verbosity": verbosity} if is_gpt5 else {}),
},
"top_logprobs": top_logprobs, # 0 = disabled, or 1-20
"instructions": instructions,
"input": [{"type": "message", "role": "user", "content": user_message}],
}
# Only a Chat Completions API feature
# all not implemented on Responses
responses_body.pop("frequency_penalty", None)
responses_body.pop("presence_penalty", None)
responses_body.pop("logit_bias", None)
responses_body.pop("modalities", None)
responses_body.pop("audio", None)
responses_body.pop("n", None)
responses_body.pop("stop", None)
responses_body.pop("prediction", None)
# moved elsewhere or unnecessary on Responses, thus untolerated
responses_body.get("stream_options", {}).pop("include_usage", None)
responses_body.pop("max_tokens", None)
responses_body.pop("max_completion_tokens", None)
responses_body.pop("response_format", None)
responses_body.pop("messages", None)
responses_body.pop("verbosity", None)
responses_body.pop("reasoning_effort", None)
responses_body.pop("web_search_options", None)
# - API call formation
headers: dict[str, str] = {
"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
}
# --- Responses API call
resp: httpx.Response | None = None
rheaders: dict[str, str] | None = None
try:
with httpx.Client(timeout=900) as client:
resp = client.post(
"https://api.openai.com/v1/responses",
headers=headers,
json=responses_body,
)
resp.raise_for_status()
rheaders = dict(resp.headers)
except httpx.HTTPStatusError as e:
print(f"Request failed: {e}")
if e.response is not None:
try:
# print body error messages from OpenAI
print("Error response body:\n", e.response.text)
rheaders = dict(e.response.headers)
except Exception:
pass
raise
except httpx.RequestError as e:
print(f"Request error: {e}")
raise
response = resp.json()
response["output_text"] = "".join(
content_block["text"]
for message in response.get("output", [])
if message.get("type") == "message" and message.get("role") == "assistant"
for content_block in message.get("content", [])
if content_block.get("type") == "output_text" and "text" in content_block
)
# print(json.dumps(response.get("output"), indent=2))
print(response.get("output_text"))
# --- Chat Completions API call
resp = None
rheaders = None
try:
with httpx.Client(timeout=600) as client:
resp = client.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=chatcompletions_body,
)
resp.raise_for_status()
rheaders = dict(resp.headers)
except httpx.HTTPStatusError as e:
print(f"Request failed: {e}")
if e.response is not None:
try:
# print body error messages from OpenAI
print("Error response body:\n", e.response.text)
rheaders = dict(e.response.headers)
except Exception:
pass
raise
except httpx.RequestError as e:
print(f"Request error: {e}")
raise
ccresponse = resp.json()
ccresponse["output_text"] = ccresponse["choices"][0]["message"]["content"]
#print(json.dumps(ccresponse.get("choices"), indent=2))
print(ccresponse.get("output_text"))
# reminder of SDK module usage
#import openai
#openai_client = openai.Client()
#r = openai_client.responses.create(**responses_body)
#r = openai_client.chat.completions.create(**chatcompletions_body)
This will run and both a Responses and Chat Completions call should succeed and print your response for a vast variety of parameter options.
The API Reference will let you get descriptions for each of the possible parameters.
Not even shown: structured output or tools. Not shown: streaming, async. Logprobs behave differently between models and endpoints also.