I am trying to ask GPT to highlight some information in a given PDF and return the PDF with highlights to me.
I first tried it on the ChatGPT Web UI, using GPT-4o, and everything works as expected: I can download the file generated by ChatGPT and the information is highlighted.
But while I tried the same prompt by API, the API will return a similar message to tell me it generated a file and I can download it from sandbox:/file/hlt-patient-case.pdf
.
I searched for the solutions about how to get the content of the file, and I found some posts said I should be able to get file_id
from the message object, and normally, they should be included in the content[0].text.annotations
.
But unfortunately, the annotations
in my message is empty. So, is there anything I did wrong or a bug of the API or SDK?
This is my code:
assistant = openai.beta.assistants.create(
name=assistant_name,
model=model,
description='An assistant for PDF process',
instructions=assistant_instruction,
tools=[
{
'type': 'file_search',
}
]
)
pdf_path = base_path / '64_559.pdf'
file = gpt.create_file_from_path(pdf_path.name, pdf_path)
vs = gpt.create_vector_store(pdf_path.name, [file])
thread = openai.beta.threads.create(
messages=[
{
'role': 'user',
'content': prompt,
'attachments': [
{
'file_id': file.id, 'tools': [ { 'type': 'file_search' } ]
}
]
}
],
tool_resources={
'file_search': {'vector_store_ids': [vs.id]}
}
)
run = openai.beta.threads.runs.create_and_poll(
thread_id=thread.id,
assistant_id=assistant.id,
)
result_messages = list(openai.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
result = result_messages[0]
And the result message is
Message(
id='msg_eus22HVAlLrpMxGMA4jG7SO2',
assistant_id='asst_3ui4KC0d6K8vHN6B1NxoQs2b',
attachments=[],
completed_at=None,
content=[
TextContentBlock(
text=Text(
annotations=[],
value='... the answer...\n\n[Download processed PDF](sandbox:/file/hlt-patient-case.pdf)\n\n...'),
type='text')],
created_at=1718242911,
incomplete_at=None,
incomplete_details=None,
metadata={},
object='thread.message',
role='assistant',
run_id='run_yIAFJYSnnisT9xDP9wfWMUSV',
status=None,
thread_id='thread_4XYf1m1df3m9YiQrIehcfwQA')
I also upgrade the SDK to 1.34.0, the latest version I could install by pip. And tried to start the run
in the stream approach. But no lucky.
The gpt
in the code is a module to wrap some APIs so I can extend them in the future.
Here is the functions used in the code above:
# gpt.py
def create_file_from_content(file_name: str, content: str|bytes, file_purpose: str = 'assistants') -> openai.types.file_object.FileObject:
file = openai.files.create(
file=(file_name, content),
purpose=file_purpose
)
return file
def create_file_from_path(file_name: str, path: str | pathlib.Path, file_purpose: str = 'assistants') -> FileObject:
with open(path, 'rb') as f:
content = f.read()
return create_file_from_content(file_name, content, file_purpose)
def create_file_from_url(file_name: str, url: str, file_purpose: str = 'assistants') -> FileObject:
resp = requests.get(url)
content = resp.content
return create_file_from_content(file_name, content, file_purpose)
def get_file(file_name: str, file_purpose: str = 'assistants') -> Optional[FileObject]:
return next((f for f in openai.files.list(purpose=file_purpose) if f.filename == file_name), None)
def delete_file(file_name: str, file_purpose: str = 'assistants') -> Optional[FileDeleted]:
if file := get_file(file_name, file_purpose):
return openai.files.delete(file.id)
def create_vector_store(name: str, files: list[FileObject]):
return openai.beta.vector_stores.create(name=name, file_ids=[f.id for f in files])