I think I got a clue on what is going on: there seems to be a limit on the max size for individual input elements on gpt-5.x family when using truncation:auto.
Since usually we don’t send a prompt that huge, but a collection of individual turn elements on a big conversation, this issue hasn’t been noticed very often.
That limit seems to be around 70k, but if you break down into elements it does go through as expected.
Here is a batch of tests breaking down a 700k payload into input items with different lengths:
Model: gpt-5.2
TOTAL_CHARS: 700,000
STEP_ITEM_CHARS: ['700,000', '400,000', '350,000', '300,000']
step | item_chars | items | payload_chars | single_item_tokens | disabled_tokens | auto_tokens | diff | status
-----+------------+-------+---------------+--------------------+-----------------+-------------+--------+----------------
1 | 700,000 | 1 | 700,000 | 136,406 | 136,406 | 72,002 | 64,404 | auto < disabled
2 | 400,000 | 2 | 700,000 | 77,951 | 136,414 | 130,465 | 5,949 | auto < disabled
3 | 350,000 | 2 | 700,000 | 68,208 | 136,414 | 136,414 | 0 | same
4 | 300,000 | 3 | 700,000 | 58,465 | 136,420 | 136,420 | 0 | same
Notice that under 350k chars or about 70k tokens it does not truncate and works as expected, and you can still send a 700k payload in 2 items.
So, a workaround solution is probably to break down the prompt into 2 or more elements like this when one exceed 350k chars:
{
"model": "gpt-5.2",
"input": [
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "This is the first user turn."
}
]
},
{
"role": "user",
"content": [
{
"type": "input_text",
"text": "This is the second user turn."
}
]
}
],
"truncation": "auto"
}
Or perhaps a file input if you are sending a book or similar, but I haven’t tested it.
Code
MODEL = "gpt-5.2"
# Fixed total payload size per step, in characters.
TOTAL_CHARS = 700_000
# Characters per individual input item for each step.
STEP_ITEM_CHARS = [700_000, 400_000, 350_000, 300_000]
def make_text(item_index: int, target_chars: int) -> str:
prefix = f"[item={item_index}] "
base_sentence = (
"This is a synthetic test payload for the OpenAI Responses API. "
"We are sending many separate input items instead of one large string. "
"The purpose is to test whether truncation='auto' changes the counted "
"input token total or appears to exclude earlier items from the input. "
)
text = prefix
while len(text) < target_chars:
text += base_sentence
return text[:target_chars]
def make_single_input_item(item_chars: int) -> list[dict]:
return [
{
"role": "user",
"content": [
{
"type": "input_text",
"text": make_text(0, item_chars),
}
],
}
]
def make_input_items(total_chars: int, item_chars: int) -> list[dict]:
"""
Fill a total character budget using as many input items as fit.
The last item may be smaller to exactly fill the budget.
"""
items = []
remaining = total_chars
i = 0
while remaining > 0:
current_size = min(item_chars, remaining)
text = make_text(i, current_size)
items.append(
{
"role": "user",
"content": [
{
"type": "input_text",
"text": text,
}
],
}
)
remaining -= len(text)
i += 1
return items
def approx_char_count(input_items: list[dict]) -> int:
return sum(
len(part["text"])
for item in input_items
for part in item["content"]
if part["type"] == "input_text"
)
def count_tokens(model: str, input_items: list[dict], truncation: str):
return client.responses.input_tokens.count(
model=model,
input=input_items,
truncation=truncation,
)
def safe_count(model: str, input_items: list[dict], truncation: str):
try:
result = count_tokens(model, input_items, truncation)
return result.input_tokens, None
except Exception as e:
return None, str(e)
def fmt_num(value):
return "-" if value is None else f"{value:,}"
def fmt_text(value, max_len=38):
if value is None:
return "-"
return value if len(value) <= max_len else value[: max_len - 3] + "..."
def print_table(rows):
headers = [
"step",
"item_chars",
"items",
"payload_chars",
"single_item_tokens",
"disabled_tokens",
"auto_tokens",
"diff",
"status",
]
data = []
for row in rows:
data.append(
[
str(row["step"]),
f'{row["item_chars"]:,}',
f'{row["items"]:,}',
f'{row["payload_chars"]:,}',
fmt_num(row["single_item_tokens"]),
fmt_num(row["disabled_tokens"]),
fmt_num(row["auto_tokens"]),
fmt_num(row["diff"]),
row["status"],
]
)
widths = [len(h) for h in headers]
for r in data:
for i, cell in enumerate(r):
widths[i] = max(widths[i], len(cell))
def render_row(row):
return " | ".join(cell.ljust(widths[i]) for i, cell in enumerate(row))
separator = "-+-".join("-" * w for w in widths)
print(render_row(headers))
print(separator)
for r in data:
print(render_row(r))
def run_step(step_index: int, total_chars: int, item_chars: int):
single_item = make_single_input_item(item_chars)
payload_items = make_input_items(total_chars=total_chars, item_chars=item_chars)
single_item_tokens, single_item_err = safe_count(MODEL, single_item, "disabled")
disabled_tokens, disabled_err = safe_count(MODEL, payload_items, "disabled")
auto_tokens, auto_err = safe_count(MODEL, payload_items, "auto")
diff = None
status = "ok"
if disabled_tokens is not None and auto_tokens is not None:
diff = disabled_tokens - auto_tokens
if diff > 0:
status = "auto < disabled"
elif diff == 0:
status = "same"
else:
status = "auto > disabled"
elif disabled_err and auto_tokens is not None:
status = "disabled failed"
elif auto_err and disabled_tokens is not None:
status = "auto failed"
elif disabled_err and auto_err:
status = "both failed"
return {
"step": step_index + 1,
"item_chars": item_chars,
"items": len(payload_items),
"payload_chars": approx_char_count(payload_items),
"single_item_tokens": single_item_tokens,
"disabled_tokens": disabled_tokens,
"auto_tokens": auto_tokens,
"diff": diff,
"status": status,
"single_item_error": single_item_err,
"disabled_error": disabled_err,
"auto_error": auto_err,
}
if __name__ == "__main__":
print(f"Model: {MODEL}")
print(f"TOTAL_CHARS: {TOTAL_CHARS:,}")
print(f"STEP_ITEM_CHARS: {[f'{x:,}' for x in STEP_ITEM_CHARS]}")
print()
rows = []
for idx, item_chars in enumerate(STEP_ITEM_CHARS):
rows.append(run_step(idx, TOTAL_CHARS, item_chars))
print_table(rows)
print("\nErrors:")
for row in rows:
if row["single_item_error"]:
print(f"Step {row['step']} single-item count failed: {row['single_item_error']}")
if row["disabled_error"]:
print(f"Step {row['step']} disabled failed: {row['disabled_error']}")
if row["auto_error"]:
print(f"Step {row['step']} auto failed: {row['auto_error']}")