I tried an approach using 2 functions.
get_image() => str: A function that returns the url (path) of an image
describe_image(url: str) =>str: A function that given a path of an image returns the description.
Inside the describe_image function there is an openapi call to describe the image using a user role.
The flow is user asks the ai for an image, the ai calls the get image function and the function returns a url, the ai responds to the get_image function and calls the describe image function passing the url from the get_image function. The describe function returns the description of the image and the ai describes the description to the user.
Here is the code
import base64
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
IMAGE_URL = "/tmp/my_image.png" # Add your own image path Here
# OPENAI_MODEL = "gpt-4o"
OPENAI_MODEL = "gpt-4o-mini"
load_dotenv()
def get_image():
return f"Here is the url of the image {IMAGE_URL}"
def describe_image(url: str):
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
base64_image = encode_image_to_base64(url)
response = client.chat.completions.create(
model=OPENAI_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe the image"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
}
],
max_tokens=300,
)
return response.choices[0].message.content
def encode_image_to_base64(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
FUNCTION_REPOSITORY = {"get_image": get_image, "describe_image": describe_image}
def process_chat_history(chat_history):
model = OPENAI_MODEL
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Define the function that can be called by the AI
tools = [
{
"type": "function",
"function": {
"name": "get_image",
"description": "Gets a random image and returns it url.",
"parameters": {},
},
},
{
"type": "function",
"function": {
"name": "describe_image",
"description": "Describes the content of an image given its URL",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "The URL of the image to describe"}
},
"required": ["url"],
},
},
},
]
try:
while True:
print("OPENAI API Call")
response = client.chat.completions.create(
model=model, messages=chat_history, tools=tools, tool_choice="auto", max_tokens=300
)
print(f"Finish Reason [{response.choices[0].finish_reason}]")
if response.choices[0].finish_reason != "tool_calls":
print(f"Assistants final Response: [{response.choices[0].message.content}]")
chat_history.append(
{"role": "assistant", "content": response.choices[0].message.content}
)
break
for tool_call in response.choices[0].message.tool_calls:
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
print(f"Running function {function_name} with arguments {function_args}.")
# Execute the function using dynamic lookup
if function_name in FUNCTION_REPOSITORY:
function = FUNCTION_REPOSITORY[function_name]
function_response = function(**function_args) if function_args else function()
else:
function_response = f"Unknown function: {function_name}"
print(f"Function {function_name}. Repsonse [{function_response}]")
# Add assistant's message with tool call
chat_history.append(
{
"role": "assistant",
"content": response.choices[0].message.content,
"tool_calls": [
{
"id": tool_call.id,
"type": "function",
"function": {
"name": function_name,
"arguments": tool_call.function.arguments,
},
}
],
}
)
# Add function response
chat_history.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"name": function_name,
"content": function_response,
}
)
except Exception as e:
return f"An error occurred: {str(e)}"
# Example usage
if __name__ == "__main__":
messages = [{"role": "user", "content": "Get me an image and describe it."}]
try:
result = process_chat_history(messages)
# print("Final Chat History Dump")
# print(json.dumps(messages, indent=2))
except Exception as e:
print(f"An error occurred: {str(e)}")