Alot of times when requesting images from Dall-e it will crop out parts of the image. There are multiple threads about it online:
Redacted as I am not allowed to show links
There may be a way to do this already using python but so far I wasn’t able to find one, so I created one myself (With chatgpt ofcourse lol)
The following example bellow will:
Prompt Dall-E for a realistic drawing of a dragon
Shrink the picture to 75% of its original value
Add a mask that fits around the area that was shrunken (the new transparent space)
Sends the picture and mask back to dall-e for the new result.
The results have been amazing!
import openai
import requests
import dotenv
import os
from PIL import Image
import io
import numpy as np
dotenv.load_dotenv()
def generatedalleimage(prompt):
# Set up the OpenAI API client
openai.api_key = os.environ.get('OPENAI_TOKEN')
# Call the DALL-E API to generate the image
response = openai.Image.create(prompt=prompt)
# Get the image URL from the response
image_url = response["data"][0]["url"]
# Download the image
print(image_url)
image_data = requests.get(image_url).content
return image_data
def shrink_image(image_data, shrink_ratio=0.75):
# Convert the image data to a PIL Image object
image = Image.open(io.BytesIO(image_data))
# Shrink the image size by the specified ratio
new_size = (int(image.width * shrink_ratio), int(image.height * shrink_ratio))
shrunken_image = image.resize(new_size, Image.Resampling.LANCZOS)
# Create a new transparent image of the original size
new_image = Image.new("RGBA", image.size)
# Calculate the position to paste the shrunken image
paste_position = ((image.width - shrunken_image.width) // 2,
(image.height - shrunken_image.height) // 2)
# Paste the shrunken image onto the transparent image
new_image.paste(shrunken_image, paste_position)
# Convert the image back to bytes
image_bytes = io.BytesIO()
new_image.save(image_bytes, format='PNG')
# Save the image to a file
new_image.save("shrunken.png")
return image_bytes.getvalue()
def create_mask(original_image_size, shrunken_image_size):
# Create a fully transparent image of the same size as the original image
mask = Image.new("L", original_image_size, 0)
# Create an opaque rectangle of the same size as the shrunken image
center = Image.new("L", shrunken_image_size, 255)
# Calculate the position to paste the center rectangle
center_position = ((original_image_size[0] - shrunken_image_size[0]) // 2,
(original_image_size[1] - shrunken_image_size[1]) // 2)
# Paste the opaque rectangle onto the transparent image
mask.paste(center, center_position)
# Convert the mask to bytes
mask_bytes = io.BytesIO()
mask.save(mask_bytes, format='PNG')
# Save the mask to a file
mask.save("mask.png")
return mask_bytes.getvalue()
def extend_image(image_data, mask_data, prompt):
# Call the Image Edit API to extend the image
response = openai.Image.create_edit(
image=image_data,
mask=mask_data,
prompt=prompt,
# Add any additional parameters you need for the edit here
)
# Get the edited image URL from the response
edited_image_url = response["data"][0]["url"]
# Download the edited image
print(edited_image_url)
edited_image_data = requests.get(edited_image_url).content
return edited_image_data
# Generate an image with DALL-E
image_data = generatedalleimage("Realistic Drawing of a Dragon")
# Shrink the image
shrunken_image_data = shrink_image(image_data, 0.75)
shrunken_image_size = (int(1024*0.75), int(1024*0.75))
# Create a mask of the same size as the original image with a center matching the shrunken image
mask_data = create_mask((1024, 1024), shrunken_image_size)
# Extend the image with the Image Edit API
extended_image_data = extend_image(shrunken_image_data, mask_data, "Realistic Drawing of a Dragon")
I am not allowed to show multiple images at once so I will show the after result in a seperate post