I thought I’d show you how underwhelming the edits endpoint is, previously with me using the higher-quality editing of Photoshop on real images to create masks instead of any dedicated application.
Thus a new Python application.
Create a dall-e-2 image in the application, and use “pen tool” to draw a mask to be replaced.
Note, the information in the mask is infilled by the edits endpoint from being blank, so you can’t say or assume things like “the same person, but looking to the left.” after deleting their head.
So we type an amended prompt into the editing prompt field, wanting a hat, and send it off.
The documentation completely omits how to get a python response object with a base64 image return. More money spent by every developer to figure it out:
response = client.images.generate(**image_params)
b64_data = response.data[0].b64_json
image_data = base64.b64decode(b64_data)
#image_file = BytesIO(image_data)
Here’s the unpretty application shown. It is wired for dalle-2 image creation for your experiments and endpoint understanding at a lower price.
#!/usr/bin/env python3
"""
dalle_editor.py
A tkinter GUI application that uses the new OpenAI DALL-E 2 API endpoints to:
- Create a 1024×1024 image from a prompt (using images.generate)
- Allow the user to “draw” a mask using a 20×20 eraser tool (this mask is stored as metadata only)
- Display the image unaltered with a semi-transparent red overlay showing the mask areas for feedback
- Call the edit endpoint (images.edit) using the original image and the mask (converted to a 32-bit PNG with an alpha channel)
- Save the unaltered original image to disk
Requirements:
- Python 3.11+
- Pillow: pip install pillow
- OpenAI Python SDK (with new images.generate / images.edit methods): pip install openai
- Set the environment variable OPENAI_API_KEY with your API key.
"""
import base64
from io import BytesIO
import tkinter as tk
from tkinter import filedialog, messagebox
from PIL import Image, ImageTk, ImageDraw
# Import the new OpenAI API client (which supports images.generate and images.edit)
from openai import OpenAI
# =============================================================================
# Main Application Class
# =============================================================================
class DalleEditorApp(tk.Tk):
def __init__(self):
super().__init__()
self.title("DALL-E Editor")
# Reserve 500px for controls on left and 1024x1024 for image display.
self.geometry("1600x1100")
self.resizable(False, False)
# Create the OpenAI API client.
self.client = OpenAI() # Uses the OPENAI_API_KEY env variable
# -----------------------------------------------------------
# Create UI Panels
# -----------------------------------------------------------
# Left panel: Control widgets (500 pixels wide)
self.control_frame = tk.Frame(self, width=500)
self.control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=10, pady=10)
# Right panel: Image display (1024x1024)
self.image_frame = tk.Frame(self, width=1024, height=1024, bd=2, relief=tk.SUNKEN)
self.image_frame.pack(side=tk.RIGHT, padx=10, pady=10)
self.canvas = tk.Canvas(self.image_frame, width=1024, height=1024, bg="gray")
self.canvas.pack()
# -----------------------------------------------------------
# Create Control Widgets
# -----------------------------------------------------------
# Pen tool toggle button: toggles drawing mode.
self.drawing_enabled = False
self.pen_button = tk.Button(
self.control_frame,
text="Pen Tool (Off)",
width=20,
command=self.toggle_pen_tool,
)
self.pen_button.pack(pady=5)
# Reset mask button: clears any mask drawn.
self.reset_button = tk.Button(
self.control_frame,
text="Reset Mask",
width=20,
command=self.reset_mask,
)
self.reset_button.pack(pady=5)
# DALL-E creation prompt area.
tk.Label(self.control_frame, text="DALL-E Prompt:").pack(pady=(15, 0))
self.dalle_prompt_text = tk.Text(self.control_frame, height=5, width=50)
self.dalle_prompt_text.pack(pady=5)
self.create_new_button = tk.Button(
self.control_frame,
text="Create New",
width=20,
command=self.create_new_image,
)
self.create_new_button.pack(pady=5)
# Editing prompt area.
tk.Label(self.control_frame, text="Editing Prompt:").pack(pady=(15, 0))
self.editing_prompt_text = tk.Text(self.control_frame, height=5, width=50)
self.editing_prompt_text.pack(pady=5)
self.create_edit_button = tk.Button(
self.control_frame,
text="Create Edit",
width=20,
command=self.create_edit,
)
self.create_edit_button.pack(pady=5)
# Save to disk button.
self.save_button = tk.Button(
self.control_frame,
text="Save to Disk",
width=20,
command=self.save_to_disk,
)
self.save_button.pack(pady=15)
# -----------------------------------------------------------
# Image and Mask Attributes
# -----------------------------------------------------------
# self.base_image: The original image (PIL.Image in "RGB") as received from DALL-E.
# self.mask_image: A grayscale (mode "L") image holding mask metadata. Pixel value 0 means “drawn” (to be masked) and 255 means untouched.
self.base_image = None
self.mask_image = None
# For display purposes only (a semi-transparent overlay is applied to the base image).
self.tk_image = None # ImageTk.PhotoImage for tkinter display
self.canvas_image_id = None # The canvas image item ID
# Flag to indicate that the mask has been modified (i.e. at least one erase stroke has been drawn).
self.mask_modified = False
# -----------------------------------------------------------
# Bind Mouse Events for Drawing (Mask Editing)
# -----------------------------------------------------------
self.canvas.bind("<ButtonPress-1>", self.on_mouse_down)
self.canvas.bind("<B1-Motion>", self.on_mouse_drag)
# -----------------------------------------------------------
# UI Control Methods
# -----------------------------------------------------------
def toggle_pen_tool(self):
"""Toggle the pen (mask drawing) tool on/off."""
self.drawing_enabled = not self.drawing_enabled
state = "On" if self.drawing_enabled else "Off"
self.pen_button.config(text=f"Pen Tool ({state})")
def reset_mask(self):
"""Reset the mask metadata (clear any drawn areas)."""
if self.base_image is None:
messagebox.showerror("Error", "No image loaded.")
return
self.mask_image = Image.new("L", self.base_image.size, 255)
self.mask_modified = False
self.update_display() # refresh the overlay
def create_new_image(self):
"""Call the DALL-E generate endpoint using the prompt from the text box."""
prompt = self.dalle_prompt_text.get("1.0", tk.END).strip()
if not prompt:
messagebox.showerror("Error", "Please enter a prompt for image creation.")
return
try:
image_params = {
"model": "dall-e-2", # Defaults to DALL-E 2
"n": 1, # Only one image
"size": "1024x1024", # Supported sizes: 256x256, 512x512, 1024x1024
"prompt": prompt,
"response_format": "b64_json",
}
response = self.client.images.generate(**image_params)
# Extract the base64 PNG data (this may change if another image format is returned).
b64_data = response.data[0].b64_json
image_data = base64.b64decode(b64_data)
image_file = BytesIO(image_data)
# Pillow will automatically detect the file type (PNG, WEBP, etc.)
new_image = Image.open(image_file).convert("RGB")
if new_image.size != (1024, 1024):
messagebox.showwarning("Warning", f"Image resolution {new_image.size} does not match 1024x1024.")
self.base_image = new_image
# Create a new mask image where 255 means “no mask”
self.mask_image = Image.new("L", self.base_image.size, 255)
self.mask_modified = False
self.update_display()
except Exception as e:
messagebox.showerror("Error", f"Error creating new image:\n{e}")
def create_edit(self):
"""Call the DALL-E edit endpoint using the current base image and the mask metadata."""
if self.base_image is None:
messagebox.showerror("Error", "No image loaded.")
return
if not self.mask_modified:
messagebox.showerror("Error", "No mask drawn. Use the pen tool to mark areas for editing.")
return
prompt = self.editing_prompt_text.get("1.0", tk.END).strip()
if not prompt:
messagebox.showerror("Error", "Please enter an editing prompt.")
return
try:
# Save the current base image to an in-memory PNG file.
base_bytes = BytesIO()
self.base_image.save(base_bytes, format="PNG")
base_bytes.seek(0)
# Prepare a mask file.
# The API requires a 32-bit PNG with an alpha channel.
# Our self.mask_image is a grayscale image where 0 indicates a drawn area.
# To build the mask file, we create an RGBA image and use self.mask_image as the alpha channel.
mask_rgba = Image.new("RGBA", self.mask_image.size, (0, 0, 0, 255))
mask_rgba.putalpha(self.mask_image)
mask_bytes = BytesIO()
mask_rgba.save(mask_bytes, format="PNG")
mask_bytes.seek(0)
response = self.client.images.edit(
image=base_bytes,
mask=mask_bytes,
prompt=prompt,
n=1,
size="1024x1024",
response_format="b64_json",
)
b64_data = response.data[0].b64_json
image_data = base64.b64decode(b64_data)
image_file = BytesIO(image_data)
new_image = Image.open(image_file).convert("RGB")
if new_image.size != (1024, 1024):
messagebox.showwarning("Warning", f"Image resolution {new_image.size} does not match 1024x1024.")
# Replace the base image with the edited image.
self.base_image = new_image
# Reset the mask since the edit has been applied.
self.mask_image = Image.new("L", self.base_image.size, 255)
self.mask_modified = False
self.update_display()
except Exception as e:
messagebox.showerror("Error", f"Error creating edit:\n{e}")
def save_to_disk(self):
"""Save the original (unaltered) base image to disk."""
if self.base_image is None:
messagebox.showerror("Error", "No image to save.")
return
file_path = filedialog.asksaveasfilename(
defaultextension=".png", filetypes=[("PNG Files", "*.png")]
)
if file_path:
try:
# Save the original base image, not the display composite.
self.base_image.save(file_path, "PNG")
except Exception as e:
messagebox.showerror("Error", f"Error saving image:\n{e}")
# -----------------------------------------------------------
# Mouse / Drawing Handlers for Mask Editing
# -----------------------------------------------------------
def on_mouse_down(self, event):
if self.drawing_enabled and self.base_image is not None:
self.draw_mask_at(event.x, event.y)
def on_mouse_drag(self, event):
if self.drawing_enabled and self.base_image is not None:
self.draw_mask_at(event.x, event.y)
def draw_mask_at(self, x, y):
"""
Draw a 20x20 square (centered on (x,y)) into the mask metadata.
In self.mask_image, a pixel value of 0 means that the area is “drawn” (marked for edit).
"""
if self.mask_image is None:
return
half = 10 # half the size of the 20x20 square.
# Compute coordinates; clamp to image boundaries.
left = max(x - half, 0)
top = max(y - half, 0)
right = min(x + half, self.mask_image.width)
bottom = min(y + half, self.mask_image.height)
# Draw the rectangle onto the mask (fill value 0 indicates the mask is set)
draw = ImageDraw.Draw(self.mask_image)
draw.rectangle([left, top, right, bottom], fill=0)
self.mask_modified = True
self.update_display()
# -----------------------------------------------------------
# Image Display Update (with Mask Overlay for User Feedback)
# -----------------------------------------------------------
def update_display(self):
"""
Update the UI image display. The underlying self.base_image is never modified.
Instead, a semi-transparent red overlay is composited where the mask has been drawn,
to indicate to the user which areas are marked.
"""
if self.base_image is None:
return
# Convert the base image to RGBA.
base_disp = self.base_image.convert("RGBA")
# Create an overlay image with a transparent background.
overlay = Image.new("RGBA", self.base_image.size, (0, 0, 0, 0))
# Build a binary mask: where self.mask_image is 0 (i.e. drawn) we want full intensity (255), else 0.
binary_mask = self.mask_image.point(lambda p: 255 if p == 0 else 0)
# Paste a semi-transparent red color (e.g. red with alpha=128) where the binary mask is nonzero.
overlay.paste((255, 0, 0, 128), (0, 0), binary_mask)
# Composite the overlay on top of the base image.
display_img = Image.alpha_composite(base_disp, overlay)
# Update the tkinter PhotoImage (and keep a reference to avoid garbage collection).
self.tk_image = ImageTk.PhotoImage(display_img)
if self.canvas_image_id is None:
self.canvas_image_id = self.canvas.create_image(0, 0, anchor="nw", image=self.tk_image)
else:
self.canvas.itemconfig(self.canvas_image_id, image=self.tk_image)
# =============================================================================
# Main
# =============================================================================
def main():
app = DalleEditorApp()
app.mainloop()
if __name__ == "__main__":
main()