I had hashed out my standard direct output for my kruel.ai system and put in an assistant endpoint which uses a continued thread per user along with my ai memory system providing the data.
What I noticed it everything starts out fine but than all of a sudden it just starts saying same last response for no reason. I am trying to use the viewer for threads and I can see that it is responding with a new response ever time so not sure why the response coming back seems to be older response.
here is the logic parts to explain the logic I am trying:
async def handle_assistant_response(assistant_id, super_prompt, user, async_client):
runic_logging_debug(f" Handling assistant response for user β{user}β with super prompt β{super_prompt}β")
# Ensure a thread exists for the user or create one
thread_id = await ensure_thread_exists(async_client, user)
# Send user's message to the thread and create a run
try:
message_response = await async_client.beta.threads.messages.create(
thread_id=thread_id,
role="user",
content=super_prompt # Ensure super_prompt is sent
)
runic_logging_debug(f"Message sent to thread {thread_id}, response: {message_response}")
run = await trigger_assistant(async_client, thread_id, assistant_id)
runic_logging_debug(f"Triggered assistant {assistant_id} for thread {thread_id}, run ID: {run.id}")
except Exception as e:
runic_logging_debug(f"Error in handling response: {e}")
return "Sorry, there was an error in processing your request."
# Adaptive polling for assistant's response
start_time = time.time()
current_polling_interval = INITIAL_POLLING_INTERVAL
last_notification_time = start_time
while True:
elapsed_time = time.time() - start_time
if elapsed_time > TIMEOUT:
runic_logging_debug("Assistant response timed out.")
return "Assistant response timed out."
# Send a direct message to the user every 10 seconds
if time.time() - last_notification_time >= 10.0:
runic_logging_debug(f"π§ π§ π§ ...THINKING/RESEARCHING...π§ π§ π§ ")
last_notification_time = time.time()
run_status = await async_client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id)
runic_logging_debug(f"Polling assistant run status: {run_status.status}")
if run_status.status == "completed":
messages = await async_client.beta.threads.messages.list(thread_id=thread_id, order="asc")
assistant_responses = [msg for msg in messages.data if msg.role == 'assistant']
if assistant_responses:
response = assistant_responses[-1].content[0].text.value
runic_logging_debug(f"Assistant response: {response}")
runic_logging_debug(f"assistant: {assistant_id}, User: {user}, Super Prompt: {super_prompt}, Response: {response}")
return response
break
elif run_status.status == "failed":
runic_logging_debug(f"Run failed: {run_status.last_error}")
return "Assistant run failed."
await asyncio.sleep(current_polling_interval)
current_polling_interval = min(MAX_POLLING_INTERVAL, current_polling_interval * 2)
runic_logging_debug("No response from assistant.")
return "No response received from assistant."
async def ensure_thread_exists(async_client, user_id):
if user_id not in user_threads:
new_thread = await async_client.beta.threads.create()
user_threads[user_id] = new_thread.id
runic_logging_debug(f"Created new thread for user β{user_id}β with thread ID β{new_thread.id}β")
return user_threads[user_id]
async def trigger_assistant(async_client, thread_id, assistant_id):
try:
run = await async_client.beta.threads.runs.create(
thread_id=thread_id,
assistant_id=assistant_id,
)
return run
except Exception as e:
raise e
async def generate_ai_response(async_client, super_prompt):
runic_logging_debug(βgenerate_ai_response enteredβ)
ββ"
Generate an AI response based on the super prompt, instructing the model to clearly communicate its capabilities and limitations.
ββ"
enhanced_prompt = super_prompt + ββ"
\n\nWhen responding, clearly respond with the persona and information available and avoid implying that further action will be taken beyond providing insights based on pre-existing data. If additional details are needed, encourage the user to specify their query further.ββ"
try:
response = await async_client.chat.completions.create(
model="gpt-3.5-turbo-0125",
messages=[{"role": "system", "content": enhanced_prompt}],
max_tokens=500
)
if response.choices:
return response.choices[0].message.content.strip()
else:
return "I currently don't have further details. Please specify your question if you're looking for specific information."
except Exception as e:
return f"Error generating response: {e}"
What am I missing. I am sure I will figure it out but thought I would ask to save some time
with out the assistant using my direct output works perfectly. My rational for changing to an assistant would be that I could not only have kruel.ai memory system but also a thread end point that has its own contextual memory for responses beyond what i provide. It also would allow me a fast way for swapping models for testing different levels of ai intelligence.
thanks.