GPT-4-Vision supporting JSON?

I saw on Twitter that GPT-4-Vision is now supporting JSON mode but this is not mentioned in the documentation. Anyone tried it yet? Is it actually supported now?

Yes, the parameter is supported - and you should be giving a system description good enought that the parameter would not be required, because the AI still cannot guess what you want.

Here’s vision example code I just happened to have open, to then give a performative JSON prompting/parameter tweak.

import base64
import urllib.request
from PIL import Image
from io import BytesIO
from openai import OpenAI

client = OpenAI()

def retrieve_image(input_string):
    if input_string.startswith("http"):
        req = urllib.request.Request(input_string, headers={'User-Agent': 'Mozilla/5.0'})
        with urllib.request.urlopen(req) as response:
            image = Image.open(BytesIO(response.read()))
    else:
        with open(input_string, "rb") as image_file:
            image = Image.open(image_file)

    max_size = 1024
    width, height = image.size
    if max(width, height) > max_size:
        aspect_ratio = float(width) / float(height)
        if width > height:
            new_width = max_size
            new_height = int(new_width / aspect_ratio)
        else:
            new_height = max_size
            new_width = int(new_height * aspect_ratio)
        image = image.resize((new_width, new_height), Image.LANCZOS)

    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode("utf-8")

# Either a httpX URL to first retrieve locally, or a local file
base64_image = retrieve_image("https://i.imgur.com/C2Bvncv.png")

parameters = {
    "model": "gpt-4-turbo",
    "response_format": {"type": "json_object"},
    "max_tokens": 500,
    "messages": [{"role": "system", "content": ("You are Extracto, a computer vision assistant."
        'Brief response to user will be only JSON, no markdown, in the value for a JSON key "response"')},
        {
            "role": "user",
            "content": [
                """
                An image is attached. Describe the image contents.
                """.strip(),
                {"image": base64_image}
            ]
        }
    ]
}

cc = client.chat.completions.create(**parameters)
print(cc.choices[0].message.content)
1 Like

Works for me. Ensure you use the latest model version: gpt-4-turbo-2024-04-09

1 Like