Image Upload / Recognition via Bing Chat

N2U · August 27, 2023, 8:19pm

So in the absence of a Clip API, I decided to generate captions for images using Salesforce’s BLIP (Bootstrapped Language Image Pre-training) model. It’s not perfect, but it works and it only uses 32mb of ram

Usage

The script is intended to be run from the command line. The images should be placed in an ‘images’ directory located in the same directory as the script. The script will process all images in the ‘images’ directory and save the results to a file in the same directory as the script.

Output

The script generates a CSV file named ‘output.csv’ with the following columns:

Filename
Type (PNG, JPEG, JPG, GIF)
Number of Frames (for GIF’s)
Height in Pixels
Width in Pixels
File Size in MB
Description (Caption generated by the BLIP model)


import os
import csv
from PIL import Image
from fractions import Fraction
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load BLIP model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Function to get dimensions of an image
def get_image_dimensions(image_path):
    with Image.open(image_path) as img:
        return img.width, img.height

# Function to get frame count of a GIF
def get_gif_frame_count(gif_path):
    with Image.open(gif_path) as img:
        frames = 0
        while True:
            try:
                img.seek(frames)
                frames += 1
            except EOFError:
                break
        return frames

# Function to compute the aspect ratio of an image
def compute_aspect_ratio(width, height):
    return Fraction(width, height).limit_denominator()

# Function to get file size in MB
def get_file_size_mb(file_path):
    return os.path.getsize(file_path) / (1024 * 1024)

# Function to write a CSV file
def write_csv(csv_file, csv_data):
    with open(csv_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Filename", "Type", "Number of Frames", "Height in Pixels", "Width in Pixels", "File Size in MB", "Description"])
        writer.writerows(csv_data)

# Function to get image caption using BLIP
def get_image_caption(image_path):
    raw_image = Image.open(image_path).convert('RGB')
    inputs = processor(raw_image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

# Main function
def main():
    # Define the directory containing the extracted files
    current_dir = os.path.dirname(os.path.abspath(__file__))
    extract_dir = os.path.join(current_dir, 'images')

    # List the contents of the extracted directory
    extracted_files = os.listdir(extract_dir)

    # Prepare data for the CSV
    csv_data = []

    for file in extracted_files:
        file_path = os.path.join(extract_dir, file)
        file_size_mb = get_file_size_mb(file_path)
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            width, height = get_image_dimensions(file_path)
            description = get_image_caption(file_path)
            frames = 1
            if file.lower().endswith('.gif'):
                frames = get_gif_frame_count(file_path)
            csv_data.append([file, 'Image', frames, height, width, file_size_mb, description])

    # Write the CSV file
    csv_file = os.path.join(current_dir, 'output.csv')
    write_csv(csv_file, csv_data)

# Run the main function
if __name__ == "__main__":
    main()

Topic		Replies	Views
DALL-E API to generate json data from image API api	12	4733	December 19, 2023
GPT-4 API and image input API	49	72109	December 12, 2023
ChatGPT goes Multimodal! Sound and vision is rolling out on ChatGPT Community chatgpt , multimodal	34	13700	December 10, 2023
4o image generation: WOW! Feedback	12	5736	March 27, 2025
GPT-4-Vision Interesting Uses and Examples Thread (2023) Community gpt-4-vision	24	12227	April 22, 2024

Image Upload / Recognition via Bing Chat

Usage

Output

Related topics