API error - image_url is only supported by certain models

My code

import asyncio
import base64
from openai import AsyncOpenAI
from constants import OPEN_AI_API_TOKEN


async def convert_image_to_base64(image_path: str) -> str:
    """
    Конвертирует изображение в base64-encoded строку.

    :param image_path: Путь к изображению.
    :return: Base64-encoded строка.
    """
    with open(image_path, "rb") as image_file:
        image_data = image_file.read()
        encoded_image = base64.b64encode(image_data).decode("utf-8")
        mime_type = "image/jpeg"  # Измените на "image/png", если это PNG
        return f"data:{mime_type};base64,{encoded_image}"


async def analyze_face_with_gpt(image_path: str):
    """
    Отправка изображения с текстовым промптом в ChatGPT.
    """
    # Преобразование изображения в base64
    base64_image = await convert_image_to_base64(image_path)

    # Подготовка сообщений для GPT
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Привет!\nЧто ты обо мне думаешь?"
                },
                {
                    "type": "image_url",
                    "image": {
                        "url": base64_image
                    }
                }
            ]
        }
    ]

    # Инициализация клиента OpenAI
    client = AsyncOpenAI(api_key=OPEN_AI_API_TOKEN)
    # Выполнение запроса
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages
    )
    print(response['choices'][0]['message']['content'])
    return response['choices'][0]['message']['content']


# Запуск функции
photo = "photo_2024-09-29_03-41-17.jpg"
asyncio.run(analyze_face_with_gpt(photo))

return

openai.BadRequestError: Error code: 400 - {'error': {'message': 'Invalid content type. image_url is only supported by certain models.', 'type': 'invalid_request_error', 'param': 'messages.[0].content.[1].type', 'code': None}}

From docs

GPT-4o mini

GPT-4o mini (“o” for “omni”) is our most advanced model in the small models category, and our cheapest model yet. It is multimodal (accepting text or image inputs and outputting text), has higher intelligence than gpt-3.5-turbo but is just as fast. It is meant to be used for smaller tasks, including vision tasks.

We recommend choosing gpt-4o-mini where you would have previously used gpt-3.5-turbo as this model is more capable and cheaper.

It is multimodal

Either there is a bag in the documentation, or in the API, or in my code.

My code

import asyncio
import base64
from openai import AsyncOpenAI
from constants import OPEN_AI_API_TOKEN


async def convert_image_to_base64(image_path: str) -> str:
    """
    Конвертирует изображение в base64-encoded строку.

    :param image_path: Путь к изображению.
    :return: Base64-encoded строка.
    """
    with open(image_path, "rb") as image_file:
        image_data = image_file.read()
        encoded_image = base64.b64encode(image_data).decode("utf-8")
        mime_type = "image/jpeg"  # Измените на "image/png", если это PNG
        return f"data:{mime_type};base64,{encoded_image}"


async def analyze_face_with_gpt(image_path: str):
    """
    Отправка изображения с текстовым промптом в ChatGPT.
    """
    # Преобразование изображения в base64
    base64_image = await convert_image_to_base64(image_path)

    # Подготовка сообщений для GPT
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Привет!\nЧто ты обо мне думаешь?"
                },
                {
                    "type": "image_url",
                    "image": {
                        "url": base64_image
                    }
                }
            ]
        }
    ]

    # Инициализация клиента OpenAI
    client = AsyncOpenAI(api_key=OPEN_AI_API_TOKEN)
    # Выполнение запроса
    response = await client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages
    )
    print(response)
    return response['choices'][0]['message']['content']


# Запуск функции
photo = "photo_2024-09-29_03-41-17.jpg"
asyncio.run(analyze_face_with_gpt(photo))

check the documentation, there’s only a couple of models that accept “vision” multi model functionality

And of those models that support computer vision, the proper method of passing the base64 encoded data as an image file to the chat completions endpoint is required.

response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "What is in this image?",
        },
        {
          "type": "image_url",
          "image_url": {
            "url":  f"data:image/jpeg;base64,{base64_image}"
          },
        },
      ],
    }
  ],
)