I’m exploring how well gpt-5, gpt-5-mini and gpt-5-nano can understand input PDFs. No matter what I try, I can’t manage to get gpt-5 and gpt-5-mini to ever recognise the PDF input_file content message item. Oddly gpt-5-nano sees it about 1/5th of the time.
This is broadly consistent with my finding with gpt-4.1 two months ago in this forum question:
Can anyone indicate what I might be doing wrong (see code snippet below)? Or shed some light on what the actual capabilities of these models are for direct PDF inputs?
Many thanks
Stephen
def build_user_content(
self,
input_text: str | None = None,
input_files: bytes | list[bytes] | None = None,
) -> tuple[dict[str, Any], str]:
"""
Build the content for the OpenAI request and return (content_list, content_digest).
"""
content: list[dict[str, Any]] = []
message: dict[str, Any] = {"role": "user", "content": content} #, "type": "message"}
blobs: list[bytes] = []
if isinstance(input_files, bytes):
input_files = [input_files]
if isinstance(input_files, list):
blobs.extend(input_files)
for file_bytes in input_files:
file_digest = calculate_file_digest(file_bytes)
if file_type := filetype.guess(file_bytes): # type: ignore
file_name = f"{file_digest}.{file_type.extension}"
file_data = f"data:{file_type.mime};base64,{base64.b64encode(file_bytes).decode()}"
if file_type.extension in ('jpg', 'jpeg', 'webp', 'png', 'gif'):
item = {"type": "input_image", "image_url": file_data, "detail": "auto"}
elif file_type.extension in ('pdf',):
item = {"type": "input_file", "filename": file_name, "file_data": file_data}
else:
raise ValueError(f"Unsupported file type: {file_type.extension} for file with digest {file_digest}")
else:
raise ValueError(f"Cannot determine file type for file with digest {file_digest}")
content.append(item)
if input_text:
blobs.append(input_text.encode())
item = {"type": "input_text", "text": input_text}
content.append(item)
digest = xxhash.xxh3_64_hexdigest(b''.join(blobs))
return message, digest