UPDATE2: what helped is to resize the input image. in my case the input image gets saved locally and then its img path is used to encode base64 before it gets to the API. Now the image gets resized (according to the vision preferences) and then enccoded.
# Function to resize the image while maintaining aspect ratio
def resize_image(generic_image_path, max_short_dimension=768, max_long_dimension=2000):
# Open the image file
generic_image = Image.open(generic_image_path)
generic_width, generic_height = generic_image.size
# Determine which dimension is the limiting factor
if generic_width > generic_height:
scaling_factor = min(max_long_dimension / generic_width, max_short_dimension / generic_height)
else:
scaling_factor = min(max_short_dimension / generic_width, max_long_dimension / generic_height)
# Calculate new dimensions
new_width = int(generic_width * scaling_factor)
new_height = int(generic_height * scaling_factor)
# Resize the image
generic_image = generic_image.resize((new_width, new_height), Image.ANTIALIAS)
return generic_image
# Function to convert the image to base64 (as requested by OpenAI)
def image_to_base64(generic_image_path, max_short_dimension=768, max_long_dimension=2000):
# Resize the image
resized_generic_image = resize_image(generic_image_path, max_short_dimension, max_long_dimension)
# Convert resized image to base64
generic_buffered = io.BytesIO()
resized_generic_image.save(generic_buffered, format="JPEG") # You can change format if necessary
return base64.b64encode(generic_buffered.getvalue()).decode('utf-8')
# Function to extract description/code from the traffic violation notice image
def extract_image_details(openai_api_key, generic_image_path):
# Encode the image
base64_encoded_image = image_to_base64(generic_image_path)
generic_headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
generic_payload = {
"model": "gpt-4o",
"response_format": { "type": "json_object" },
"messages": [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_encoded_image}"
}
}
],
},
],
"temperature": 0,
"max_tokens": 300
}
generic_response = requests.post("https://api.openai.com/v1/chat/completions", headers=generic_headers, json=generic_payload)
return generic_response