I couldn't figure out which model I suppose to use, could someone help?

import os
from openai import OpenAI
from typing import Optional, Dict, Any
from PIL import Image
import requests
from io import BytesIO
from dotenv import load_dotenv
import base64

load_dotenv()

Interior design styles and their prompts

INTERIOR_STYLES = {
“modern”: {
“prompt”: “modern interior design, clean lines, minimalist, neutral colors, sleek furniture”,
“negative_prompt”: “cluttered, vintage, rustic, ornate decorations”
},
“scandinavian”: {
“prompt”: “scandinavian design, light woods, white walls, natural light, minimal decor”,
“negative_prompt”: “dark colors, heavy furniture, cluttered spaces”
},
“industrial”: {
“prompt”: “industrial style, exposed brick, metal fixtures, raw materials, open space”,
“negative_prompt”: “traditional furniture, floral patterns, pastel colors”
},
“traditional”: {
“prompt”: “traditional interior, classic furniture, warm colors, elegant details”,
“negative_prompt”: “ultra-modern, minimalist, industrial elements”
},
“bohemian”: {
“prompt”: “bohemian style, eclectic decor, rich patterns, plants, natural materials”,
“negative_prompt”: “minimal decoration, monochromatic, formal arrangement”
}
}

class StagingService:
def init(self):
self.client = OpenAI(api_key=os.getenv(‘OPENAI_API_KEY’))

async def detect_room_type(self, image_url: str) -> str:
    """
    Detect the room type from the image using OpenAI's Vision API.
    """
    try:
        # Handle local file paths
        if image_url.startswith('file://'):
            image_path = image_url.replace('file://', '')
            with open(image_path, 'rb') as image_file:
                image_data = base64.b64encode(image_file.read()).decode('utf-8')
                image_url = f"data:image/jpeg;base64,{image_data}"

        response = self.client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "What type of room is this? Just respond with one word: living-room, bedroom, kitchen, bathroom, dining-room, or office."
                        },
                        {
                            "type": "image_url",
                            "url": image_url
                        }
                    ]
                }
            ],
            max_tokens=10
        )
        
        room_type = response.choices[0].message.content.strip().lower()
        return room_type if room_type in ["living-room", "bedroom", "kitchen", "bathroom", "dining-room", "office"] else "living-room"
        
    except Exception as e:
        print(f"Room detection error: {str(e)}")
        return "living-room"

async def generate_staged_image(
    self,
    image_url: str,
    style: str,
    room_type: Optional[str] = None
) -> Dict[str, Any]:
    """
    Generate a staged image using OpenAI's DALL-E API.
    """
    try:
        if not room_type:
            room_type = await self.detect_room_type(image_url)
            
        style_config = INTERIOR_STYLES.get(style, INTERIOR_STYLES["modern"])
        
        # Construct the prompt
        prompt = f"Transform this {room_type} into a {style_config['prompt']}, maintain the room's layout and architectural features, photorealistic, professional interior design photography style"
        
        try:
            # Generate the staged image using DALL-E
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=prompt,
                n=1,
                size="1024x1024",
                quality="standard",
                response_format="url"  # Explicitly request URL format
            )
            
            # Get the URL of the generated image
            staged_image_url = response.data[0].url
            
            return {
                "success": True,
                "staged_image_url": staged_image_url,
                "room_type": room_type,
                "style": style,
                "prompt": prompt
            }
            
        except Exception as e:
            print(f"DALL-E generation error: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "room_type": room_type,
                "style": style
            }
        
    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "room_type": room_type,
            "style": style
        }
        
async def process_image(
    self,
    image_url: str,
    style: str,
    room_type: Optional[str] = None
) -> Dict[str, Any]:
    """
    Process an image through the staging pipeline.
    """
    try:
        # Generate the staged image
        result = await self.generate_staged_image(image_url, style, room_type)
        
        if not result["success"]:
            return {
                "success": False,
                "error": result["error"]
            }
            
        return {
            "success": True,
            "original_image": image_url,
            "staged_image": result["staged_image_url"],
            "room_type": result["room_type"],
            "style": style
        }
        
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }

Sure. There’s many AI models with vision, and it mostly comes down to the quality of the output for a difficult task that you also want to accompany the ability to see an image.

I would start with “gpt-4.1”

However, why choose? Here’s code that will randomly choose a model for you, from all AI models that support vision!

(it also shows how to properly make a request, while your code is not the right way to send images in a user message)

import base64, time, openai
import random

vision_models = [
    "o3-2025-04-16",
    "o4-mini-2025-04-16",
    "gpt-4.1-2025-04-14",
    "gpt-4.1-mini-2025-04-14",
    "gpt-4.1-nano-2025-04-14",
    "gpt-4.5-preview-2025-02-27",
    "o1-2024-12-17",
    "gpt-4o-2024-11-20",
    "gpt-4o-2024-08-06",
    "gpt-4o-mini-2024-07-18",
    "gpt-4o-2024-05-13",
    "gpt-4-turbo-2024-04-09",
]

# Select a random vision model!!! Chat Roulette version 2!
model = random.choice(vision_models)

# this section makes a built-in list of base64 images to try
pngpre = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABACAMAAADlCI9NAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////'
example_images = [
'MzMzOFSMkQAAAPJJREFUeNrslm0PwjAIhHv//09rYqZADzOBqMnu+WLTruOGvK0lhBBCCPHH4E7x3pwAfFE4tX9lAUBVwZyAYjwFAeikgH3XYxn88nzKbIZly4/BluUlIG66RVXBcYd9TTQWN+1vWUEqIJQI5nqYP6scl84UqUtEoLNMjoqBzFYrt+IF1FOTfGsqIIlcgAbNZ0Uoxtu6igB+tyBgZhCgAZ8KyI46zYQF/LksQC0L3gigdQBhgGkXou1hF1XebKzKXBxaDsjCOu1Q/LA1U+Joelt/9d2QVm9MjmibO2mGTEy2ZyetsbdLgAQIIYQQQoifcRNgAIfGAzQQHmwIAAAAAElFTkSuQmCC',
'AAAAVcLTfgAAAPRJREFUeNrsllEKwzAMQ+37X3owBm0c2VZCIYXpfXVBTd9qx5uZEEIIIcQr8IHjAgcc/LTBGwSiz5sEoIwTKwuxVCAW5XsxFco3Y63A3BawVWDMiFgiMD5tvELNuh/r5sA9Nu1yiYaXvBBLBawUAGubsZU5UOy8HkNvINoAv27nMVZ1WC1wfwrspPk2FDMiVpYknNu6uIxAVWQsgBoSCCQxI2KEANFdXccXseZzuKMQQDFmt6pPwU9CL+CcADEJr6qFA1aWYIgZEesGEVgmTsGvfYyIdaPYwp6JwBRL5kD4Hs7+VWGSz8aEEEIIIYQQ/8VHgAEAxPsD+SYeZ2QAAAAASUVORK5CYII=',
]
example_images = [pngpre + s for s in example_images]

# but you can use your own local file here as a test
you_have_a_file = False
if you_have_a_file:
    # Load image and convert to base64
    with open("my_image.png", "rb") as image_file:
        example_image = base64.b64encode(image_file.read()).decode("utf-8")
else:
    example_image = example_images[0]

# This is the CORRECT way to send an image, not bot-fabricated nonsense
user_message = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Describe the attached image"
            },
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{example_image}",
                    "detail": "auto"
                }
            },
        ],
    }
]

# Then teach your bot the proper way to form an API request
# with_raw_response also gives headers, which I extract to variables
params = {
  "model": model,
  "max_completion_tokens": 1500,
  "top_p": 0.01,
  "messages": user_message,
  "timeout": 60,
}
if model.startswith("o"):
    params.pop("top_p")

client = openai.Client(timeout=111)
start = time.perf_counter()
try:
    response = client.chat.completions.with_raw_response.create(**params)

    headers_dict = response.headers.items().mapping.copy()
    for key, value in headers_dict.items():  # set a var for each header
        globals()[f'headers_{key.replace("-", "_")}'] = value
    reply=""

    print(f"With {model}, input tokens were: {response.parse().usage.prompt_tokens}")
    print(f"{response.parse().choices[0].message.content[:1000]}")
    print(f"[elapsed: {time.perf_counter()-start:.1f} seconds]")
except Exception as e:
    print(f"Error: {e}")