How can you use the API to merge two pictures?

Yes. You can use multiple base64 images as one user message, and can interleave text with the images sequentially.

This is the style of a complete app request with multiple base64 images that only sends to a DALL-E API as strict response format.

from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": "You use your computer vision on user images to make new AI images."
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "Take the style from this image:"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "data:image/png;base64,..."
          }
        },
        {
          "type": "text",
          "text": "Take the subject from this image:"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": "data:image/png;base64,..."
          }
        },
        {
          "type": "text",
          "text": "Examine attached images. Synthesize a new dalle image, with combined description."
        }
      ]
    }
  ],
  response_format={
    "type": "json_schema",
    "json_schema": {
      "name": "dalle_output",
      "strict": True,
      "schema": {
        "type": "object",
        "properties": {
          "prompt": {
            "type": "string",
            "description": "A textual description of the image to be generated. Target 200 words."
          },
          "size": {
            "type": "string",
            "description": "The size of the generated image. 1792 is 'wide'.",
            "enum": [
              "1024x1024",
              "1792x1024"
            ]
          }
        },
        "required": [
          "prompt",
          "size"
        ],
        "additionalProperties": False
      }
    }
  },
  temperature=0.5,
  max_completion_tokens=1500,
  top_p=0.9,
)
1 Like