Assistant Implementation Concern

chrisgelosulit · January 11, 2024, 1:40am

I don’t know what is happening, but implementation 1 works and 2 is getting a bad request error.

Implementation 1:

import os
import time
from datetime import datetime
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

try:
    client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

    assistant = client.beta.assistants.create(
        name="Machine Learning FAQ's Assistant",
        instructions='As an AI researcher with expertise in labor law and the TRAIN law of the Philippines, your role is to provide comprehensive and precise responses to inquiries. Draw upon the information contained within the uploaded documents to inform your answers.',
        tools=[{'type': 'retrieval'}], 
        model='gpt-4-1106-preview',
        file_ids=['file-R2D9wZymehervuVT4u14mXLr']
    )

    thread = client.beta.threads.create()

    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role='user',
        content='Can you tell me about the document you have?',
    )

    start_time = datetime.now()  # Start timing the response generation
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    def wait_on_run_retrieve(run, thread):
        while run.status == 'queued' or run.status == 'in_progress':
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            
            time.sleep(0.5)

        return run
    
    run = wait_on_run_retrieve(run=run, thread=thread)
    end_time = datetime.now()  # End timing after the run is complete
    duration = (end_time - start_time).total_seconds()  # Calculate the duration
    print(f'AI response time: {duration} seconds')  # Output the duration

    messages = client.beta.threads.messages.list(
        thread_id=thread.id,
    )

    
    for message in reversed(messages.data):
        if hasattr(message.content[0], 'text'):
            print(message.role + ' : ' + message.content[0].text.value) # type: ignore

except Exception as e:
    print(f'An error occurred: {e}')

Implementation 2: Error:

django-1    |     raise self._make_status_error_from_response(err.response) from None
django-1    | openai.BadRequestError: Error code: 400 - {'error': {'message': 'Files [file-R2D9wZymehervuVT4u14mXLr] are invalid', 'type': 'invalid_request', 'param': None, 'code': None}}

import os
import time
from datetime import datetime
from openai import OpenAI

class AssistantResearcher:
    """
    A class to interact with the OpenAI API to create assistants and ask questions.
    """
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)

    def create_assistant(self, name, instructions, model, file_ids):
        """
        Create an assistant with the given parameters.
        """
        return self.client.beta.assistants.create(
            name=name,
            instructions=instructions,
            tools=[{'type': 'retrieval'}],
            model=model,
            file_ids=file_ids
        )

    def ask_question(self, assistant_id, question, thread_id=None):
        """
        Ask a question to the assistant and return the response.
        If a thread_id is provided, it reuses the thread instead of creating a new one.
        """
        if thread_id is None:
            thread = self.client.beta.threads.create()
            thread_id = thread.id
        else:
            thread = self.client.beta.threads.retrieve(thread_id=thread_id)

        self.client.beta.threads.messages.create(
            thread_id=thread_id,
            role='user',
            content=question,
        )
        start_time = datetime.now()
        run = self.client.beta.threads.runs.create(
            thread_id=thread_id,
            assistant_id=assistant_id
        )
        run = self.wait_on_run_retrieve(run=run, thread=thread)
        end_time = datetime.now()
        duration = (end_time - start_time).total_seconds()
        print(f'AI response time: {duration} seconds')
        return self.get_assistant_response(thread_id=thread_id)

    def wait_on_run_retrieve(self, run, thread):
        """
        Wait for the run to complete and retrieve the result.
        """
        while run.status == 'queued' or run.status == 'in_progress':
            run = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run

    def get_assistant_response(self, thread_id):
        """
        Retrieve the assistant's response from the thread.
        """
        messages = self.client.beta.threads.messages.list(thread_id=thread_id)
        for message in reversed(messages.data):
            if message.role == 'assistant' and hasattr(message.content[0], 'text'):
                return message.content[0].text.value  # type: ignore
        return None

class TestAssistant:
    """
    A class to demonstrate the usage of the AssistantResearcher class.
    """
    @staticmethod
    def example_usage():
        """
        An example usage of the AssistantResearcher class.
        """
        api_key = os.getenv("OPENAI_API_KEY", None)
        assistant_researcher = AssistantResearcher(api_key=api_key)
        assistant = assistant_researcher.create_assistant(
            name="Test Assistant",
            instructions="Answer the following questions accurately.",
            model="gpt-4-1106-preview",
            file_ids=['file-R2D9wZymehervuVT4u14mXLr']
        )
        question = "Can you tell me about the document you have?"
        response = assistant_researcher.ask_question(assistant_id=assistant.id, question=question)
        print(f"Assistant's response: {response}")

_j · January 11, 2024, 2:13am

I ran the second one sort of as file and sort of piecemeal, and it works with just the lines within TestAssistant. Also invoking a class object.

t = TestAssistant()
t.example_usage()

Only thing is I left the file list empty – and you got a file error.

The API has been finicky about particular file types, such as JSON. I would start with a .txt file. Maybe one that says the capital was moved to Nice in December…

chrisgelosulit · January 11, 2024, 2:44am

Hahaha even chatGPT is confused when I try to get help.

jango-1 | File “/usr/local/lib/python3.11/site-packages/openai/_base_client.py”, line 930, in _request
django-1 | raise self._make_status_error_from_response(err.response) from None
django-1 | openai.BadRequestError: Error code: 400 - {‘error’: {‘message’: ‘Files [file-R2D9wZymehervuVT4u14mXLr] are invalid’, ‘type’: ‘invalid_request’, ‘param’: None, ‘code’: None}}
django-1 | ERROR [2024-01-11 02:43:21,892] [9f19075204aa42e78fd9cdaa5bb96b1a] django.server: “GET /bots/retrieval-qa?q=Can%20you%20tell%20me%20about%20the%20document%20you%20have? HTTP/1.1” 500 125069

_j · January 11, 2024, 2:57am

Mistake one: don’t ask a bot.

But in case your error message was too long to be understood (and I had to chow through your 90+ lines of code):

Files [file-R2D9wZymehervuVT4u14mXLr] are invalid

chrisgelosulit · January 11, 2024, 3:35am

Thank you for your time. I appreciate it. I don’t know why it is invalid because it’s working on my first implementation. The implementation 2 works if the file_ids is empty.

jonah_mytzuchi · January 11, 2024, 3:38am

chrisgelosulit:

    def wait_on_run_retrieve(self, run, thread):
        """
        Wait for the run to complete and retrieve the result.
        """
        while run.status == 'queued' or run.status == 'in_progress':
            run = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run

I suggest to approach this function like below, it works for me. Previously I also experience similar error.

    def wait_on_run_retrieve(self, run, thread):
        """
        Wait for the run to complete and retrieve the result.
        """
        retrieve = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
        while retrieve.status == 'queued' or retrieve.status == 'in_progress':
            retrieve = self.client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return retrieve

_j · January 11, 2024, 3:44am

Are you immediately uploading the files as part of the code that creates the assistant? It may take some processing time after upload for files to become available to retrieval methods.

Throw a time.sleep(30) in before your assistant creation and see.

chrisgelosulit · January 11, 2024, 3:47am

No, I’m referencing the files from the Openai files tab. copying it’s File ID

_j · January 11, 2024, 3:54am

You might do a “list files” yourself from the API, and then send only from the verified pattern match string of the file you want.

chrisgelosulit · January 11, 2024, 4:09am

This one solves the error the files from tab in the UI is different from this one https://platform.openai.com/docs/api-reference/files/list

Topic		Replies	Views
Need help with Assistant (uploading file and getting response back) API assistants-api	6	2214	February 16, 2024
Assistant sometimes reply with: files you've uploaded are not accessible with the myfiles_browser tool API gpt-4 , api	12	7712	March 19, 2024
Assistant API - Error with files API	20	6879	October 9, 2024
The Assistants API cannot access files for some reason API	10	3738	February 6, 2025
No response from Assistant api API gpt-4 , assistants-api	2	806	May 24, 2024

Assistant Implementation Concern

Files [file-R2D9wZymehervuVT4u14mXLr] are invalid

Related topics