Can someone give me a sanity check on my creation of a streaming Run? Tool calls seem to be very slow, even when the assistant has only one tool. Have I missed a best practice?
class ChatEventHandler(AsyncAssistantEventHandler):
...
async def handle_requires_action(self, data, run_id):
tool_outputs = []
for tool in data.required_action.submit_tool_outputs.tool_calls:
func_name = tool.function.name
func_args = json.loads(tool.function.arguments)
try:
output = await tools.call_tool_function(func_name, func_args)
except Exception as e:
output = tools.response_for_exception(exception=e)
tool_outputs.append({'tool_call_id': tool.id, 'output': output})
await self.submit_tool_outputs(tool_outputs, run_id)
async def submit_tool_outputs(self, tool_outputs, run_id):
handler = ChatEventHandler()
handler.sio = self.sio
handler.sid = self.sid
async with client.beta.threads.runs.submit_tool_outputs_stream(
thread_id=self.current_run.thread_id,
run_id=self.current_run.id,
tool_outputs=tool_outputs,
event_handler=handler,
) as stream:
await stream.until_done()
run = await stream.get_final_run()
usage = run.usage
if usage:
logger.debug(f"tokens: {run.usage.total_tokens} (prompt: {run.usage.prompt_tokens} completion: {run.usage.completion_tokens})")
chat_handler = ChatEventHandler()
chat_handler.sio = sio
chat_handler.sid = sid
try:
async with client.beta.threads.runs.stream(
thread_id = thread_id,
max_prompt_tokens = 1000,
assistant_id = assistant_id,
additional_messages = [
{'role':'user', 'content': user_input}
],
event_handler = chat_handler
) as stream:
await stream.until_done()
# a run will have usage=None if it is not in a terminal state such as 'completed'
# 'requires_action' for example will have None
run = await stream.get_final_run()
usage = run.usage
if usage:
logger.debug(f"query: [{user_input}]\ntokens: {run.usage.total_tokens} (prompt: {run.usage.prompt_tokens} completion: {run.usage.completion_tokens})")