This is what I previously documented:
# FILES: -- Messages example for Chat Completions, demonstrating PDF `file` content parts. --
# - PDFs are extracted text + page image, provided to the model in-context.
# - Only the "user" role may include `file` parts.
# - Provide one content part per PDF.
# - Use exclusively one of `file_id` OR base64 `file_data` inside each `file` object.
file_messages = [
{
"role": "system",
"content": "You are a meticulous research assistant."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Summarize the attached PDFs and list three key findings with page references."
},
# --- PDF via uploaded file_id (stored in OpenAI Files) ---
{
"type": "file",
"file": {
"file_id": "file-abc123def4567890"
}
},
# --- PDF via inline base64 data (no prior upload) ---
{
"type": "file",
"file": {
"filename": "product-brochure.pdf",
"file_data": "data:application/pdf;base64,JVBERi0xLjQKJcTl8uXr..."
}
}
]
}
]
I tried it with a fresh upload, and the request succeeds as before (maybe a little slower with file_id).
openai.__version__
'1.101.0'
Ensure:
- upload
"purpose": "user_data"
- uploaded by the same project ID (api key) making the API call (data scoping)
- not sending alternate organization or project headers in request
- don’t use SDKs that are consistently out-of-date and being damaged
A function with a “content” part creator for any type of attachment you want – I just tried all modalities expected on Chat Completions: OK
from pathlib import Path
import base64
from openai import OpenAI
def make_content_part(item: str, *, kind: str | None = None, detail: str = "auto") -> dict[str, object]:
"""
Create a single chat content part for Chat Completions:
- text => {"type": "text", "text": "..."}
- PDF file => {"type": "file", "file": {"file_id": "..."}}
or {"type": "file", "file": {"filename": "...", "file_data": "data:application/pdf;base64,..."}} # local only
- audio (mp3/wav)=> {"type": "input_audio", "input_audio": {"data": "...", "format": "mp3|wav"}}
- image => {"type": "image_url", "image_url": {"url": "<http(s) or data: URI>", "detail": "low|high|auto"}}
Classification precedence (default):
1) file id: length and startswith "file-" or "file_"
2) image URL: http(s) with no whitespace OR data:image/*;base64,...
3) local file: existing path:
- .pdf -> PDF "file"
- .mp3/.wav -> input_audio
- else -> image as data URI (JPEG if no extension)
4) fallback -> plain text
'kind' is a hint ("text" | "file" | "image" | "audio"), used if it can be satisfied without violating the spec.
'detail' is passed only for image content and defaults to "auto".
"""
def _is_http_url(s: str) -> bool:
return s.startswith(("http://", "https://")) and not any(ch.isspace() for ch in s)
def _is_data_image_uri(s: str) -> bool:
# Minimal detection for spec: only image data URIs are treated as images
return s.startswith("data:image/")
def _image_mime_for_ext(ext: str) -> str:
e = ext.lower().lstrip(".")
if e in ("", "jpg", "jpeg"):
return "image/jpeg"
if e == "png":
return "image/png"
if e == "gif":
return "image/gif"
if e == "webp":
return "image/webp"
if e in ("tif", "tiff"):
return "image/tiff"
if e == "bmp":
return "image/bmp"
if e == "svg":
return "image/svg+xml"
return f"image/{e}"
def _as_image_url(url: str) -> dict[str, object]:
return {"type": "image_url", "image_url": {"url": url, "detail": detail}}
s = item
is_file_id = (len(s) in range(20, 40)) and s.startswith(("file-", "file_")) # len(fileID) currently 27
is_http = _is_http_url(s)
is_data_img = _is_data_image_uri(s)
p = Path(s)
exists = p.exists() and p.is_file()
ext = p.suffix.lower().lstrip(".") if exists else ""
# Try the hint first, but only if satisfiable per spec; otherwise fall through to auto.
if kind:
k = kind.lower().strip()
if k in ("text", "input_text"):
return {"type": "text", "text": s}
if k in ("file", "pdf"):
if is_file_id:
return {"type": "file", "file": {"file_id": s}}
if exists and ext == "pdf":
b64 = base64.b64encode(p.read_bytes()).decode()
return {"type": "file", "file": {"filename": p.name, "file_data": f"data:application/pdf;base64,{b64}"}}
# else: cannot satisfy 'file' hint here; fall through to auto
if k in ("audio", "input_audio"):
if exists and ext in ("mp3", "wav"):
b64 = base64.b64encode(p.read_bytes()).decode()
return {"type": "input_audio", "input_audio": {"data": b64, "format": ext}}
# else: fall through to auto
if k in ("image", "image_url"):
if is_http or is_data_img:
return _as_image_url(s)
if exists:
raw = p.read_bytes()
b64 = base64.b64encode(raw).decode()
mime = _image_mime_for_ext(ext)
return _as_image_url(f"data:{mime};base64,{b64}")
# else: fall through to auto
# Default auto-classification (spec precedence)
if is_file_id:
return {"type": "file", "file": {"file_id": s}}
if is_http or is_data_img:
return _as_image_url(s)
if exists:
raw = p.read_bytes()
b64 = base64.b64encode(raw).decode()
if ext == "pdf":
return {"type": "file", "file": {"filename": p.name, "file_data": f"data:application/pdf;base64,{b64}"}}
if ext in ("mp3", "wav"):
return {"type": "input_audio", "input_audio": {"data": b64, "format": ext}}
mime = _image_mime_for_ext(ext)
return _as_image_url(f"data:{mime};base64,{b64}")
return {"type": "text", "text": s}
# -- Procedural demo --
# Start with a list of attachments (could be filenames, URLs, or file IDs)
attached_files = ["241207-164148-ballad.mp3"]
attached_files = ["catcube.png"]
attached_files = ["learners_paper.pdf"]
attached_files = ["file-AHNwnQacW8EbfQpudJMYPm"]
# Convert each attachment into a content part
content_parts = [make_content_part(item) for item in attached_files]
# Build messages array for chat.completions
messages = [
{"role": "system", "content": "You are a meticulous research assistant."},
{
"role": "user",
"content": [
{
"type": "text",
"text": """
What is being shown in the following content?
""".strip()
},
*content_parts
]
}
]
# Send request
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4.1-mini", # gpt-4o-audio-preview for audio types
messages=messages,
max_completion_tokens=2500,
service_tier="priority",
)
print(response.choices[0].message.content)
(I distilled down the original function a bit with gpt-5 style mangling.)