Trouble with OCR Using Multiple Photo

I have a lot of letters that I am tasked with transcribing. I have taken pictures of them and converted them to jpeg. GPT4o can do a great job transcribing them if I upload them one at a time, but if I upload multiple or send a batch as a zip file it fails to read any of them. Any advice would be appreciated.

Send them once at a time.

If you are using ChatGPT you can easily just use the API to automate your process.

That sounds good, thank you!

import os
import base64
import requests
import time

# OpenAI API Key
api_key = "YOUR_API_KEY"

# Function to encode the image in base64
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Directory containing the images to process
image_directory = "YOUR_IMAGE_DIRECTORY"
# Path to the output file where results will be saved
output_file_path = "Text_File_To_Write_TO.txt"

# API endpoint for OpenAI GPT-4V
api_url = "https://api.openai.com/v1/chat/completions"

# Headers for the API request
headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

# Function to process an image with GPT-4V and get the description
def process_image_with_gpt(image_path):
    base64_image = encode_image(image_path)
    
    # Payload for the API request
    payload = {
        "model": "gpt-4o-mini",  
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Transcribe the text from the image and include no other response."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 1000  # Adjust if you want more or fewer tokens in the response
    }

    # Send the POST request to the API
    response = requests.post(api_url, headers=headers, json=payload)
    
    # Check if the response is successful
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        return f"Error: {response.status_code}, {response.text}"

# Initialize a list to store the results in case of errors
results = []

try:
    # Loop through all images in the directory
    for filename in os.listdir(image_directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
            image_path = os.path.join(image_directory, filename)
            print(f"Processing {filename}...")

            # Process the image and get the result
            result = process_image_with_gpt(image_path)
            
            # Store the result in the results list
            results.append(f"Result for {filename}:\n{result}\n\n")

            # Print the result for logging purposes
            print(f"Finished processing {filename}. Result:\n{result}\n")

            # Wait for a few seconds before processing the next image (adjust the time as necessary)
            time.sleep(5)  # Wait for 5 seconds

except Exception as e:
    print(f"An error occurred: {str(e)}")

finally:
    # Write all results to the output text file, even if an error occurs
    with open(output_file_path, 'w', encoding='utf-8') as output_file:
        for result in results:
            output_file.write(result)

    print(f"All processed results (so far) saved in {output_file_path}.")

Here is the script I used for future reference.