Assistant Implementation Concern

I don’t know what is happening, but implementation 1 works and 2 is getting a bad request error.

Implementation 1:

import os
import time
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

try:
    client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

    assistant = client.beta.assistants.create(
        name="Machine Learning FAQ's Assistant",
        instructions='As an AI researcher with expertise in labor law and the TRAIN law of the Philippines, your role is to provide comprehensive and precise responses to inquiries. Draw upon the information contained within the uploaded documents to inform your answers.',
        tools=[{'type': 'retrieval'}], 
        model='gpt-4-1106-preview',
        file_ids=['file-R2D9wZymehervuVT4u14mXLr']
    )

    thread = client.beta.threads.create()

    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role='user',
        content='Can you tell me about the document you have?',
    )

    start_time = datetime.now()  # Start timing the response generation
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    def wait_on_run_retrieve(run, thread):
        while run.status == 'queued' or run.status == 'in_progress':
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            
            time.sleep(0.5)

        return run
    
    run = wait_on_run_retrieve(run=run, thread=thread)
    end_time = datetime.now()  # End timing after the run is complete
    duration = (end_time - start_time).total_seconds()  # Calculate the duration
    print(f'AI response time: {duration} seconds')  # Output the duration

    messages = client.beta.threads.messages.list(
        thread_id=thread.id,
    )

    
    for message in reversed(messages.data):
        if hasattr(message.content[0], 'text'):
            print(message.role + ' : ' + message.content[0].text.value) # type: ignore

except Exception as e:
    print(f'An error occurred: {e}')

Implementation 2: Error:

django-1    |     raise self._make_status_error_from_response(err.response) from None
django-1    | openai.BadRequestError: Error code: 400 - {'error': {'message': 'Files [file-R2D9wZymehervuVT4u14mXLr] are invalid', 'type': 'invalid_request', 'param': None, 'code': None}}
import os
import time
from datetime import datetime
from openai import OpenAI

class AssistantResearcher:
    """
    A class to interact with the OpenAI API to create assistants and ask questions.
    """
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)

    def create_assistant(self, name, instructions, model, file_ids):
        """
        Create an assistant with the given parameters.
        """
        return self.client.beta.assistants.create(
            name=name,
            instructions=instructions,
            tools=[{'type': 'retrieval'}],
            model=model,
            file_ids=file_ids
        )

    def ask_question(self, assistant_id, question, thread_id=None):
        """
        Ask a question to the assistant and return the response.
        If a thread_id is provided, it reuses the thread instead of creating a new one.
        """
        if thread_id is None:
            thread = self.client.beta.threads.create()
            thread_id = thread.id
        else:
            thread = self.client.beta.threads.retrieve(thread_id=thread_id)

        self.client.beta.threads.messages.create(
            thread_id=thread_id,
            role='user',
            content=question,
        )
        start_time = datetime.now()
        run = self.client.beta.threads.runs.create(
            thread_id=thread_id,
            assistant_id=assistant_id
        )
        run = self.wait_on_run_retrieve(run=run, thread=thread)
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()
        print(f'AI response time: {duration} seconds')
        return self.get_assistant_response(thread_id=thread_id)

    def wait_on_run_retrieve(self, run, thread):
        """
        Wait for the run to complete and retrieve the result.
        """
        while run.status == 'queued' or run.status == 'in_progress':
            run = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run

    def get_assistant_response(self, thread_id):
        """
        Retrieve the assistant's response from the thread.
        """
        messages = self.client.beta.threads.messages.list(thread_id=thread_id)
        for message in reversed(messages.data):
            if message.role == 'assistant' and hasattr(message.content[0], 'text'):
                return message.content[0].text.value  # type: ignore
        return None

class TestAssistant:
    """
    A class to demonstrate the usage of the AssistantResearcher class.
    """
    @staticmethod
    def example_usage():
        """
        An example usage of the AssistantResearcher class.
        """
        api_key = os.getenv("OPENAI_API_KEY", None)
        assistant_researcher = AssistantResearcher(api_key=api_key)
        assistant = assistant_researcher.create_assistant(
            name="Test Assistant",
            instructions="Answer the following questions accurately.",
            model="gpt-4-1106-preview",
            file_ids=['file-R2D9wZymehervuVT4u14mXLr']
        )
        question = "Can you tell me about the document you have?"
        response = assistant_researcher.ask_question(assistant_id=assistant.id, question=question)
        print(f"Assistant's response: {response}")

I ran the second one sort of as file and sort of piecemeal, and it works with just the lines within TestAssistant. Also invoking a class object.

t = TestAssistant()
t.example_usage()

Only thing is I left the file list empty – and you got a file error.

The API has been finicky about particular file types, such as JSON. I would start with a .txt file. Maybe one that says the capital was moved to Nice in December…

Hahaha even chatGPT is confused when I try to get help.

jango-1 | File “/usr/local/lib/python3.11/site-packages/openai/_base_client.py”, line 930, in _request
django-1 | raise self._make_status_error_from_response(err.response) from None
django-1 | openai.BadRequestError: Error code: 400 - {‘error’: {‘message’: ‘Files [file-R2D9wZymehervuVT4u14mXLr] are invalid’, ‘type’: ‘invalid_request’, ‘param’: None, ‘code’: None}}
django-1 | ERROR [2024-01-11 02:43:21,892] [9f19075204aa42e78fd9cdaa5bb96b1a] django.server: “GET /bots/retrieval-qa?q=Can%20you%20tell%20me%20about%20the%20document%20you%20have? HTTP/1.1” 500 125069

Mistake one: don’t ask a bot.

But in case your error message was too long to be understood (and I had to chow through your 90+ lines of code):

Files [file-R2D9wZymehervuVT4u14mXLr] are invalid

1 Like

Thank you for your time. I appreciate it. I don’t know why it is invalid because it’s working on my first implementation. The implementation 2 works if the file_ids is empty.

I suggest to approach this function like below, it works for me. Previously I also experience similar error.

    def wait_on_run_retrieve(self, run, thread):
        """
        Wait for the run to complete and retrieve the result.
        """
        retrieve = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
        while retrieve.status == 'queued' or retrieve.status == 'in_progress':
            retrieve = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return retrieve

Are you immediately uploading the files as part of the code that creates the assistant? It may take some processing time after upload for files to become available to retrieval methods.

Throw a time.sleep(30) in before your assistant creation and see.

No, I’m referencing the files from the Openai files tab. copying it’s File ID

1 Like

You might do a “list files” yourself from the API, and then send only from the verified pattern match string of the file you want.

1 Like

This one solves the error the files from tab in the UI is different from this one https://platform.openai.com/docs/api-reference/files/list