GPT-4 Vision API Node.js Error

Hello all, I wanted to know if anyone could help me with the Vision API integration in my telegram bot code? I keep getting

"Bot is running…

Error getting image description: {
  error: {
    message: "Invalid chat format. Expected 'content' field in all messages to be either str or list.",
    type: 'invalid_request_error',
    param: null,
    code: null
  }
}

"

I added money on openai account for the api use and it’s the correct API key as its working for the text generation part but not the vision API…

This is the Replit link to my bot code if anyone is able to help edit the code to process the images ): Thank you so much if so!

Relevant code:

const axios = require("axios");

async function processImageInput(chatId, buffer, mimeType, OPENAI_API_KEY) {
    try {
        // Check image size
        const MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024; // 20MB in bytes
        if (buffer.byteLength > MAX_IMAGE_SIZE_BYTES) {
            bot.sendMessage(chatId, "The image is too large to process. Please upload a smaller image.");
            return;
        }

        // Convert the image buffer to base64
        const base64Image = Buffer.from(buffer).toString('base64');

        // Call OpenAI Vision API for image processing
        const imageDescription = await getImageDescription(base64Image, mimeType, chatId, OPENAI_API_KEY);

        // Update state and notify the user
        updateState(chatId, 'garmentSketchDetails', imageDescription);
        bot.sendMessage(chatId, "Sketch processed. Please provide any additional details or send 'done' if you are finished.");
        updateState(chatId, "stage", "awaiting_additional_info");
    } catch (error) {
        console.error('Error processing the image:', error);
        bot.sendMessage(chatId, "Error converting the image. Please try again.");
    }
}

async function getImageDescription(base64Image, mimeType, chatId, OPENAI_API_KEY) {
    try {
        const payload = {
            model: "gpt-4-vision-preview",
            messages: [
                {
                    role: "system",
                    content: "Analyze the garment sketch and provide a description."
                },
                {
                    role: "user",
                    content: `data:${mimeType};base64,${base64Image}`
                }
            ],
            max_tokens: 3500
        };

        const headers = {
            Authorization: `Bearer ${OPENAI_API_KEY}`,
            "Content-Type": "application/json"
        };

        const response = await axios.post("https://api.openai.com/v1/chat/completions", payload, { headers });

        if (response.data.choices && response.data.choices.length > 0) {
            const content = response.data.choices[0].message.content;
            console.log("Response received:", content);
            return content;
        } else {
            throw new Error('No response from OpenAI API');
        }
    } catch (error) {
        console.error('Error getting image description:', error.response ? error.response.data : error.message);

        let errorMessage = "Sorry, I couldn't analyze the image. Please try again later.";
        if (error.message.includes('Unsupported image type')) {
            errorMessage = error.message;
        }
        throw new Error(errorMessage);
    }
}
1 Like

Is anyone able to help me what I’m doing wrong? Who can I speak to? I tried posting here… I am following the API doc……

Hi @Nikoldigital ,
the error message alludes to an error in the composition of the payload, in particular the ‘content’ keys.
‘content’ keys are objects, not strings. In your constant ‘payload’, ‘content’ keys are simple strings, such as:

"content": "Analyze the garment sketch and provide a description."

or

"content": "data:${mimeType};base64,${base64Image}"

But they must be something like that:

"content": [
  {              
    "type": "text",                                               
    "text": "Analyze the garment sketch and provide a description."
  }
]

and

"content": [
  {
    "type": "text",
    "text": "{USER_PROMPT_HERE}"
  },
  {
    "image_url": {
      "url": "data:image/jpeg;base64,{BASE64_IMAGE_DATA_HERE}",
      "detail": "high"
    },
    "type": "image_url"
  }
]

My examples are json objects, but you can apply them to javascript objects, as well.

This is a complete structure of a working json payload:

{                     
  "messages": [
    {
      "content": [
        {
          "type": "text",
          "text": "Analyze the garment sketch and provide a description."
        } 
      ], 
      "role": "system"  
    },                                                               
    {                       
      "content": [
        {                    
          "type": "text",
          "text": "{USER_PROMPT_HERE_IF_ANY}"
        },
        {
          "image_url": {
            "url": "data:${mimeType};base64,${base64Image}",
            "detail": "{high/low IS_YOUR_CHOICE_SEE_API_DOCS}"
          },
          "type": "image_url"
        }                                                               
      ],
      "role": "user"
    }
  ],                                                                                                                                                                    
  "model": "gpt-4-vision-preview",
  "max_tokens": 3500
}

Hope it’s help.
Ciao

2 Likes

Thank you so much! It worked!! was stuck on for 2 weeks.

1 Like