Why is there no file_id of the file generated by OpenAI API in my message object?

I am trying to ask GPT to highlight some information in a given PDF and return the PDF with highlights to me.

I first tried it on the ChatGPT Web UI, using GPT-4o, and everything works as expected: I can download the file generated by ChatGPT and the information is highlighted.

But while I tried the same prompt by API, the API will return a similar message to tell me it generated a file and I can download it from sandbox:/file/hlt-patient-case.pdf.

I searched for the solutions about how to get the content of the file, and I found some posts said I should be able to get file_id from the message object, and normally, they should be included in the content[0].text.annotations.

But unfortunately, the annotations in my message is empty. So, is there anything I did wrong or a bug of the API or SDK?

This is my code:

assistant = openai.beta.assistants.create(
    name=assistant_name,
    model=model,
    description='An assistant for PDF process',
    instructions=assistant_instruction,
    tools=[
        {
            'type': 'file_search',
        }
    ]
)

pdf_path = base_path / '64_559.pdf'
file = gpt.create_file_from_path(pdf_path.name, pdf_path)
vs = gpt.create_vector_store(pdf_path.name, [file])

thread = openai.beta.threads.create(
    messages=[
        {
            'role': 'user',
            'content': prompt,
            'attachments': [
                {
                    'file_id': file.id, 'tools': [ { 'type': 'file_search' } ]
                }
            ]
        }
    ],
    tool_resources={
        'file_search': {'vector_store_ids': [vs.id]}
    }
)

run = openai.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

result_messages = list(openai.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

result = result_messages[0]

And the result message is

Message(
  id='msg_eus22HVAlLrpMxGMA4jG7SO2', 
  assistant_id='asst_3ui4KC0d6K8vHN6B1NxoQs2b', 
  attachments=[], 
  completed_at=None, 
  content=[
    TextContentBlock(
      text=Text(
        annotations=[], 
        value='... the answer...\n\n[Download processed PDF](sandbox:/file/hlt-patient-case.pdf)\n\n...'), 
      type='text')], 
  created_at=1718242911, 
  incomplete_at=None, 
  incomplete_details=None, 
  metadata={}, 
  object='thread.message', 
  role='assistant', 
  run_id='run_yIAFJYSnnisT9xDP9wfWMUSV', 
  status=None, 
  thread_id='thread_4XYf1m1df3m9YiQrIehcfwQA')

I also upgrade the SDK to 1.34.0, the latest version I could install by pip. And tried to start the run in the stream approach. But no lucky.


The gpt in the code is a module to wrap some APIs so I can extend them in the future.
Here is the functions used in the code above:

# gpt.py

def create_file_from_content(file_name: str, content: str|bytes, file_purpose: str = 'assistants') -> openai.types.file_object.FileObject:
    file = openai.files.create(
        file=(file_name, content),
        purpose=file_purpose
    )
    return file


def create_file_from_path(file_name: str, path: str | pathlib.Path, file_purpose: str = 'assistants') -> FileObject:
    with open(path, 'rb') as f:
        content = f.read()
        return create_file_from_content(file_name, content, file_purpose)


def create_file_from_url(file_name: str, url: str, file_purpose: str = 'assistants') -> FileObject:
    resp = requests.get(url)
    content = resp.content
    return create_file_from_content(file_name, content, file_purpose)


def get_file(file_name: str, file_purpose: str = 'assistants') -> Optional[FileObject]:
    return next((f for f in openai.files.list(purpose=file_purpose) if f.filename == file_name), None)


def delete_file(file_name: str, file_purpose: str = 'assistants') -> Optional[FileDeleted]:
    if file := get_file(file_name, file_purpose):
        return openai.files.delete(file.id)


def create_vector_store(name: str, files: list[FileObject]):
    return openai.beta.vector_stores.create(name=name, file_ids=[f.id for f in files])

I have found the problem by myself.
After I added code_interpreter into the tools, I could get the file information include file_id from both the annotations and the attachements.

I guess, file_search does not actually support generating file.

Here is the code (only changed part)

assistant = openai.beta.assistants.create(
    name=assistant_name,
    model=model,
    description='An assistant for PDF process',
    instructions=assistant_instruction,
    tools=[
        {
            'type': 'file_search',
        },
        {
            'type': 'code_interpreter',
        }
    ]
)

thread = openai.beta.threads.create(
    messages=[
        {
            'role': 'user',
            'content': prompt,
            'attachments': [
                {
                    'file_id': file.id, 'tools': [
                        { 'type': 'file_search' },
                        { 'type': 'code_interpreter' },
                    ]
                }
            ]
        }
    ],
    tool_resources={
        'file_search': {'vector_store_ids': [vs.id]}
    }
)

# after run
result_messages = list(openai.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

result = result_messages[0]

annotations = result.content[0].text.annotations
len(annotations)
file_id = annotations[0].file_path.file_id
file_name = base_path / 'processed.pdf'

out_pdf = openai.files.content(file_id).read()

with open(file_name, 'wb') as f:
    f.write(out_pdf)