Ok, so this code was able to get the app to describe the image once and the next few responses were errors or it saying that it couldn’t see images and the last response was
Blockquote
This image contains a base64 encoded string that represents a JPEG image file. The string is very long and contains a series of characters and symbols that cannot be interpreted without decoding it. The decoded image would likely display a picture, but without decoding the base64 string, it is not possible to describe the content of the image.
Chat suggested using CLIP for image analysis. Does this make sense?
import gradio as gr
import json
import openai
import base64
import numpy as np
import cv2
# Load API keys from config file
with open("config.json", "r") as config_file:
config = json.load(config_file)
OPENAI_API_KEY = config["openai_api_key"]
openai.api_key = OPENAI_API_KEY
def image_to_base64(image):
# Resize the image to reduce size
resized_image = cv2.resize(image, (128, 128)) # Resize to 256x256 or other suitable size
_, encoded_image = cv2.imencode('.png', resized_image)
base64_image = base64.b64encode(encoded_image).decode('utf-8')
return base64_image
def chat_with_bot(combined_input, base64_image=None):
try:
messages = [{"role": "user", "content": combined_input}]
if base64_image:
# Format the image as a data URL string
image_data_url = f"data:image/jpeg;base64,{base64_image}"
image_message = {"role": "system", "content": image_data_url}
messages.append(image_message)
response = openai.ChatCompletion.create(
model="gpt-4-vision-preview",
messages=messages,
max_tokens=300 # Adjust as needed
)
return response['choices'][0]['message']['content']
except Exception as e:
return f"An error occurred: {str(e)}"
def handle_input(question, image):
base64_image = ""
if image is not None:
base64_image = image_to_base64(image)
return chat_with_bot(question, base64_image)
# Function to generate images using Dall-e
def generate_image(prompt):
openai.api_key = OPENAI_API_KEY
try:
response = openai.Image.create(
prompt=prompt,
n=1, # Number of images to generate
size="1024x1024" # Image size
)
# Extracting the image URL from the response
image_url = response['data'][0]['url']
# Return HTML with flexbox for centering
return f"<div style='display: flex; justify-content: center;'><img src='{image_url}' width='512' /></div>"
except Exception as e:
return f"An error occurred: {e}"
def handle_submission(question, api_key, action):
if action == "submit":
return chat_with_bot(question)
elif action == "generate":
return generate_image(question)
else:
return "Invalid action"
# Gradio interface setup
with gr.Blocks() as demo:
gr.Markdown("### Milo")
with gr.Row():
question = gr.Textbox(label="Ask a question or describe the image:")
image_input = gr.Image(label="Or upload an image for analysis")
submit_btn = gr.Button("Submit")
response_text = gr.Textbox(label="Response")
submit_btn.click(fn=handle_input, inputs=[question, image_input], outputs=response_text)
gr.Markdown("### Image Generation")
image_prompt = gr.Textbox(label="Enter prompt for image generation")
generate_btn = gr.Button("Generate Image")
image_output = gr.HTML() # Use HTML component for displaying image from URL
generate_btn.click(fn=generate_image, inputs=[image_prompt], outputs=image_output)
demo.launch()