I couldn't figure out which model I suppose to use, could someone help?

Batu_Taskan · May 12, 2025, 11:57am

import os
from openai import OpenAI
from typing import Optional, Dict, Any
from PIL import Image
import requests
from io import BytesIO
from dotenv import load_dotenv
import base64

load_dotenv()

Interior design styles and their prompts

INTERIOR_STYLES = {
“modern”: {
“prompt”: “modern interior design, clean lines, minimalist, neutral colors, sleek furniture”,
“negative_prompt”: “cluttered, vintage, rustic, ornate decorations”
},
“scandinavian”: {
“prompt”: “scandinavian design, light woods, white walls, natural light, minimal decor”,
“negative_prompt”: “dark colors, heavy furniture, cluttered spaces”
},
“industrial”: {
“prompt”: “industrial style, exposed brick, metal fixtures, raw materials, open space”,
“negative_prompt”: “traditional furniture, floral patterns, pastel colors”
},
“traditional”: {
“prompt”: “traditional interior, classic furniture, warm colors, elegant details”,
“negative_prompt”: “ultra-modern, minimalist, industrial elements”
},
“bohemian”: {
“prompt”: “bohemian style, eclectic decor, rich patterns, plants, natural materials”,
“negative_prompt”: “minimal decoration, monochromatic, formal arrangement”
}
}

class StagingService:
def init(self):
self.client = OpenAI(api_key=os.getenv(‘OPENAI_API_KEY’))

async def detect_room_type(self, image_url: str) -> str:
    """
    Detect the room type from the image using OpenAI's Vision API.
    """
    try:
        # Handle local file paths
        if image_url.startswith('file://'):
            image_path = image_url.replace('file://', '')
            with open(image_path, 'rb') as image_file:
                image_data = base64.b64encode(image_file.read()).decode('utf-8')
                image_url = f"data:image/jpeg;base64,{image_data}"

        response = self.client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "What type of room is this? Just respond with one word: living-room, bedroom, kitchen, bathroom, dining-room, or office."
                        },
                        {
                            "type": "image_url",
                            "url": image_url
                        }
                    ]
                }
            ],
            max_tokens=10
        )
        
        room_type = response.choices[0].message.content.strip().lower()
        return room_type if room_type in ["living-room", "bedroom", "kitchen", "bathroom", "dining-room", "office"] else "living-room"
        
    except Exception as e:
        print(f"Room detection error: {str(e)}")
        return "living-room"

async def generate_staged_image(
    self,
    image_url: str,
    style: str,
    room_type: Optional[str] = None
) -> Dict[str, Any]:
    """
    Generate a staged image using OpenAI's DALL-E API.
    """
    try:
        if not room_type:
            room_type = await self.detect_room_type(image_url)
            
        style_config = INTERIOR_STYLES.get(style, INTERIOR_STYLES["modern"])
        
        # Construct the prompt
        prompt = f"Transform this {room_type} into a {style_config['prompt']}, maintain the room's layout and architectural features, photorealistic, professional interior design photography style"
        
        try:
            # Generate the staged image using DALL-E
            response = self.client.images.generate(
                model="dall-e-3",
                prompt=prompt,
                n=1,
                size="1024x1024",
                quality="standard",
                response_format="url"  # Explicitly request URL format
            )
            
            # Get the URL of the generated image
            staged_image_url = response.data[0].url
            
            return {
                "success": True,
                "staged_image_url": staged_image_url,
                "room_type": room_type,
                "style": style,
                "prompt": prompt
            }
            
        except Exception as e:
            print(f"DALL-E generation error: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "room_type": room_type,
                "style": style
            }
        
    except Exception as e:
        return {
            "success": False,
            "error": str(e),
            "room_type": room_type,
            "style": style
        }
        
async def process_image(
    self,
    image_url: str,
    style: str,
    room_type: Optional[str] = None
) -> Dict[str, Any]:
    """
    Process an image through the staging pipeline.
    """
    try:
        # Generate the staged image
        result = await self.generate_staged_image(image_url, style, room_type)
        
        if not result["success"]:
            return {
                "success": False,
                "error": result["error"]
            }
            
        return {
            "success": True,
            "original_image": image_url,
            "staged_image": result["staged_image_url"],
            "room_type": result["room_type"],
            "style": style
        }
        
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }

_j · May 12, 2025, 1:05pm

Sure. There’s many AI models with vision, and it mostly comes down to the quality of the output for a difficult task that you also want to accompany the ability to see an image.

I would start with “gpt-4.1”

However, why choose? Here’s code that will randomly choose a model for you, from all AI models that support vision!

(it also shows how to properly make a request, while your code is not the right way to send images in a user message)

import base64, time, openai
import random

vision_models = [
    "o3-2025-04-16",
    "o4-mini-2025-04-16",
    "gpt-4.1-2025-04-14",
    "gpt-4.1-mini-2025-04-14",
    "gpt-4.1-nano-2025-04-14",
    "gpt-4.5-preview-2025-02-27",
    "o1-2024-12-17",
    "gpt-4o-2024-11-20",
    "gpt-4o-2024-08-06",
    "gpt-4o-mini-2024-07-18",
    "gpt-4o-2024-05-13",
    "gpt-4-turbo-2024-04-09",
]

# Select a random vision model!!! Chat Roulette version 2!
model = random.choice(vision_models)

# this section makes a built-in list of base64 images to try
pngpre = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABACAMAAADlCI9NAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////'
example_images = [
'MzMzOFSMkQAAAPJJREFUeNrslm0PwjAIhHv//09rYqZADzOBqMnu+WLTruOGvK0lhBBCCPHH4E7x3pwAfFE4tX9lAUBVwZyAYjwFAeikgH3XYxn88nzKbIZly4/BluUlIG66RVXBcYd9TTQWN+1vWUEqIJQI5nqYP6scl84UqUtEoLNMjoqBzFYrt+IF1FOTfGsqIIlcgAbNZ0Uoxtu6igB+tyBgZhCgAZ8KyI46zYQF/LksQC0L3gigdQBhgGkXou1hF1XebKzKXBxaDsjCOu1Q/LA1U+Joelt/9d2QVm9MjmibO2mGTEy2ZyetsbdLgAQIIYQQQoifcRNgAIfGAzQQHmwIAAAAAElFTkSuQmCC',
'AAAAVcLTfgAAAPRJREFUeNrsllEKwzAMQ+37X3owBm0c2VZCIYXpfXVBTd9qx5uZEEIIIcQr8IHjAgcc/LTBGwSiz5sEoIwTKwuxVCAW5XsxFco3Y63A3BawVWDMiFgiMD5tvELNuh/r5sA9Nu1yiYaXvBBLBawUAGubsZU5UOy8HkNvINoAv27nMVZ1WC1wfwrspPk2FDMiVpYknNu6uIxAVWQsgBoSCCQxI2KEANFdXccXseZzuKMQQDFmt6pPwU9CL+CcADEJr6qFA1aWYIgZEesGEVgmTsGvfYyIdaPYwp6JwBRL5kD4Hs7+VWGSz8aEEEIIIYQQ/8VHgAEAxPsD+SYeZ2QAAAAASUVORK5CYII=',
]
example_images = [pngpre + s for s in example_images]

# but you can use your own local file here as a test
you_have_a_file = False
if you_have_a_file:
    # Load image and convert to base64
    with open("my_image.png", "rb") as image_file:
        example_image = base64.b64encode(image_file.read()).decode("utf-8")
else:
    example_image = example_images[0]

# This is the CORRECT way to send an image, not bot-fabricated nonsense
user_message = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Describe the attached image"
            },
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{example_image}",
                    "detail": "auto"
                }
            },
        ],
    }
]

# Then teach your bot the proper way to form an API request
# with_raw_response also gives headers, which I extract to variables
params = {
  "model": model,
  "max_completion_tokens": 1500,
  "top_p": 0.01,
  "messages": user_message,
  "timeout": 60,
}
if model.startswith("o"):
    params.pop("top_p")

client = openai.Client(timeout=111)
start = time.perf_counter()
try:
    response = client.chat.completions.with_raw_response.create(**params)

    headers_dict = response.headers.items().mapping.copy()
    for key, value in headers_dict.items():  # set a var for each header
        globals()[f'headers_{key.replace("-", "_")}'] = value
    reply=""

    print(f"With {model}, input tokens were: {response.parse().usage.prompt_tokens}")
    print(f"{response.parse().choices[0].message.content[:1000]}")
    print(f"[elapsed: {time.perf_counter()-start:.1f} seconds]")
except Exception as e:
    print(f"Error: {e}")

Topic		Replies	Views
How to load a local image to gpt4 -vision using API API gpt-4-vision	4	48684	February 27, 2024
How to support vision and structured outputs via fine tuned models? Bugs gpt-4-1	1	159	August 17, 2025
Gpt-5, gpt-5-mini, and gpt-5-nano now available in the API Announcements gpt-5	59	9982	August 18, 2025
ChatGPT Vision vs GPT-4 vision API	8	1827	February 13, 2024
OpenAI has deprecated direct image processing through its API API	12	1257	January 27, 2025

I couldn't figure out which model I suppose to use, could someone help?

Interior design styles and their prompts

Related topics