I’m using the following code to preprocess a user’s query to identify which tool schema I should send to an Assistant:
import json
import os
import time
from dataclasses import dataclass, field
from inspect import cleandoc
from openai import AsyncOpenAI
from pprint import pprint
from rich import inspect
# Configure logging
from app.logging_gold import logging_gold as lg
logger = lg.get_logger(__name__)
client = AsyncOpenAI(organization=os.environ['OPENAI_ORGANIZATION_ID'])
prompt = cleandoc('''
<INSTRUCTIONS>
Select the tools needed to answer the user's query. Return the tools as a JSON array of the tool names or return NULL if there are no relevant tools.
<TOOLS>
tool_save_recipe - Saves a recipe to the database.
tool_add_item_to_inventory - Adds a specified quantity of an item to inventory.
tool_subtract_item_from_inventory - Subtracts a specified quantity of an item from inventory.
tool_set_inventory_amount - Sets the amount of inventory for a given item, discarding existing values.
tool_query_inventory - Requests the amount of inventory for a given item.
tool_modify_inventory_by_recipe - Adjusts the inventory by removing the ingredients needed to produce the specified quantity of a recipe.
tool_email_nutrition_label_for_recipe - Retrieves the nutrition information for a given recipe by its name and emails it to a specified address.
tool_flavor_pairings_for_juice_ingredient - Returns a list of ingredients with flavors that compliment the ingredient(s) that the user specifies.
tool_how_much_nutrient_in_food - Returns the amount of a given nutrient in a food.
tool_which_foods_high_in_nutrient - Returns a list of foods ordered by content of the requested nutrient.
tool_which_foods_for_condition - Returns a list of foods ordered by effectiveness for helping a user manage a health condition or reach a health goal.
''')
schema = {
"type": "json_schema",
"json_schema": {
"name": "tool_selector_schema",
"schema": {
"type": "object",
"properties": {
"query_result": {
"type": ["array", "null"],
"items": {
"type": "string"
}
}
},
"required": ["query_result"],
"additionalProperties": False
},
"strict": True
}
}
@dataclass
class ToolSelectionResult:
tools: list[str]|None
tokens_total: int
tokens_prompt: int
tokens_completion: int
completion_time: float
def __str__(self) -> str:
from pprint import pformat
completion_info = pformat({'tools': self.tools, 'tokens_total': self.tokens_total, 'tokens_prompt': self.tokens_prompt, 'tokens_completion': self.tokens_completion, 'completion_time': self.completion_time})
return ("Completion Response:\n"
f"{completion_info}\n"
f"\nUsage Details:\n"
f" Prompt Tokens: {self.tokens_prompt}\n"
f" Completion Tokens: {self.tokens_completion}\n"
f" Total Tokens: {self.tokens_total}\n")
async def get_tools_for_query(query: str) -> ToolSelectionResult:
'''
Gets a list of tools for a given message.
'''
start_time = time.time()
completion = await client.chat.completions.create(
model='gpt-4o-mini',
# response_format={"type": "json_object"},
# response_format={"type": "text"},
response_format=schema,
messages=[
{'role':'system', 'content':prompt},
{'role':'user', 'content': '<QUERY>\n'+query}
]
)
end_time = time.time()
completion_time = end_time-start_time
if len(completion.choices) > 1: # curious to see if any queries produce multiple choices...probably never
logger.warn(f"Got multiple choices for query: [{query}]. Choices: [{completion.choices}]")
print(completion.choices[0].message.content)
response_message = completion.choices[0].message.content
# tools = json.loads(response_message).get('query_result')
tools = response_message
result = ToolSelectionResult(
tools=tools,
tokens_total=completion.usage.total_tokens,
tokens_prompt=completion.usage.prompt_tokens,
tokens_completion=completion.usage.completion_tokens,
completion_time=completion_time
)
print(result)
return result
Timing this call with something simple like “add 10lb apples to inventory” usually completes in about 500ms, but every few runs it takes as many as 2800ms running the exact same query. When using a response_type of text
or json_object
there’s no random speed drop, so I believe it has to do with the schema. Has anyone else noticed similar performance issues?