Strange thread behaviour with assistant

I had hashed out my standard direct output for my kruel.ai system and put in an assistant endpoint which uses a continued thread per user along with my ai memory system providing the data.

What I noticed it everything starts out fine but than all of a sudden it just starts saying same last response for no reason. I am trying to use the viewer for threads and I can see that it is responding with a new response ever time so not sure why the response coming back seems to be older response.

here is the logic parts to explain the logic I am trying:

async def handle_assistant_response(assistant_id, super_prompt, user, async_client):
runic_logging_debug(f":brain::brain::brain: Handling assistant response for user β€˜{user}’ with super prompt β€˜{super_prompt}’")

# Ensure a thread exists for the user or create one
thread_id = await ensure_thread_exists(async_client, user)

# Send user's message to the thread and create a run
try:
    message_response = await async_client.beta.threads.messages.create(
        thread_id=thread_id,
        role="user",
        content=super_prompt  # Ensure super_prompt is sent
    )
    runic_logging_debug(f"Message sent to thread {thread_id}, response: {message_response}")

    run = await trigger_assistant(async_client, thread_id, assistant_id)
    runic_logging_debug(f"Triggered assistant {assistant_id} for thread {thread_id}, run ID: {run.id}")
except Exception as e:
    runic_logging_debug(f"Error in handling response: {e}")
    return "Sorry, there was an error in processing your request."

# Adaptive polling for assistant's response
start_time = time.time()
current_polling_interval = INITIAL_POLLING_INTERVAL
last_notification_time = start_time

while True:
    elapsed_time = time.time() - start_time
    if elapsed_time > TIMEOUT:
        runic_logging_debug("Assistant response timed out.")
        return "Assistant response timed out."

    # Send a direct message to the user every 10 seconds
    if time.time() - last_notification_time >= 10.0:
        runic_logging_debug(f"🧠🧠🧠...THINKING/RESEARCHING...🧠🧠🧠")
        last_notification_time = time.time()

    run_status = await async_client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
    runic_logging_debug(f"Polling assistant run status: {run_status.status}")

    if run_status.status == "completed":
        messages = await async_client.beta.threads.messages.list(thread_id=thread_id, order="asc")
        assistant_responses = [msg for msg in messages.data if msg.role == 'assistant']
        if assistant_responses:
            response = assistant_responses[-1].content[0].text.value
            runic_logging_debug(f"Assistant response: {response}")
            runic_logging_debug(f"assistant: {assistant_id}, User: {user}, Super Prompt: {super_prompt}, Response: {response}")
            return response
        break
    elif run_status.status == "failed":
        runic_logging_debug(f"Run failed: {run_status.last_error}")
        return "Assistant run failed."

    await asyncio.sleep(current_polling_interval)
    current_polling_interval = min(MAX_POLLING_INTERVAL, current_polling_interval * 2)

runic_logging_debug("No response from assistant.")
return "No response received from assistant."

async def ensure_thread_exists(async_client, user_id):
if user_id not in user_threads:
new_thread = await async_client.beta.threads.create()
user_threads[user_id] = new_thread.id
runic_logging_debug(f"Created new thread for user β€˜{user_id}’ with thread ID β€˜{new_thread.id}’")
return user_threads[user_id]

async def trigger_assistant(async_client, thread_id, assistant_id):
try:
run = await async_client.beta.threads.runs.create(
thread_id=thread_id,
assistant_id=assistant_id,
)
return run
except Exception as e:
raise e

async def generate_ai_response(async_client, super_prompt):
runic_logging_debug(β€œgenerate_ai_response entered”)
β€œβ€"
Generate an AI response based on the super prompt, instructing the model to clearly communicate its capabilities and limitations.
β€œβ€"
enhanced_prompt = super_prompt + β€œβ€"
\n\nWhen responding, clearly respond with the persona and information available and avoid implying that further action will be taken beyond providing insights based on pre-existing data. If additional details are needed, encourage the user to specify their query further.β€œβ€"

try:
    response = await async_client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=[{"role": "system", "content": enhanced_prompt}],
        max_tokens=500
    )
    if response.choices:
        return response.choices[0].message.content.strip()
    else:
        return "I currently don't have further details. Please specify your question if you're looking for specific information."
except Exception as e:
    return f"Error generating response: {e}"

What am I missing. I am sure I will figure it out but thought I would ask to save some time :slight_smile:

with out the assistant using my direct output works perfectly. My rational for changing to an assistant would be that I could not only have kruel.ai memory system but also a thread end point that has its own contextual memory for responses beyond what i provide. It also would allow me a fast way for swapping models for testing different levels of ai intelligence.

thanks.