Yes. You can use multiple base64 images as one user message, and can interleave text with the images sequentially.
This is the style of a complete app request with multiple base64 images that only sends to a DALL-E API as strict response format.
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You use your computer vision on user images to make new AI images."
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Take the style from this image:"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,..."
}
},
{
"type": "text",
"text": "Take the subject from this image:"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,..."
}
},
{
"type": "text",
"text": "Examine attached images. Synthesize a new dalle image, with combined description."
}
]
}
],
response_format={
"type": "json_schema",
"json_schema": {
"name": "dalle_output",
"strict": True,
"schema": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "A textual description of the image to be generated. Target 200 words."
},
"size": {
"type": "string",
"description": "The size of the generated image. 1792 is 'wide'.",
"enum": [
"1024x1024",
"1792x1024"
]
}
},
"required": [
"prompt",
"size"
],
"additionalProperties": False
}
}
},
temperature=0.5,
max_completion_tokens=1500,
top_p=0.9,
)