PROMPT:
I have several jpg files in the folder e:\De put pe FTP 2\Test\. I want Python code that will scan them with OCR and create docx and pdf according to the same model, but keeping the writing format identical and keeping the images identical, their size, their place on the page, etc. I want you to use ChatGPT API KEY
See Python Code. The code does not read with OCR and api key, it only prints the screen. OCR means I can copy each word separately, not select the image. I believe API-KEY from ChatGPT is better than (Tesseract OCR) on reading images, doesnât ?
import os
from PIL import Image
import base64
from docx import Document
from docx.oxml.ns import qn
from docx.shared import Inches
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import openai
# Set the path to the directory containing JPG files
input_dir = r"e:\De pus pe FTP 2\Test"
# Configure OpenAI API client (replace YOUR-API-KEY with your actual key)
client = openai.OpenAI(api_key="YOUR-API-KEY")
def encode_image(image_path):
"""Convert image to base64 for API upload."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def perform_ocr_with_chatgpt(image_path):
"""Use ChatGPT API to perform OCR on the image and extract text."""
# Encode the image to base64
base64_image = encode_image(image_path)
try:
# Send the image to ChatGPT API for OCR
response = client.chat.completions.create(
model="gpt-4o", # Using the correct GPT-4 Omni model
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Extract all text from this image accurately, preserving the layout as much as possible. Return the text with line breaks where appropriate."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=2000 # Increased to handle more text
)
extracted_text = response.choices[0].message.content.strip()
return extracted_text
except openai.APIError as e:
print(f"OpenAI API error: {e}")
return None
except Exception as e:
print(f"Unexpected error: {e}")
return None
def create_docx(output_path, text, image_path):
"""Create a .docx file with text and image preserving layout."""
doc = Document()
doc.styles['Normal'].font.name = 'Times New Roman'
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), 'Times New Roman')
# Add image (full size, adjusted to fit page)
img = Image.open(image_path)
img_width, img_height = img.size
aspect = img_height / float(img_width)
# Adjust image width to fit document margins (6 inches is reasonable)
max_doc_width = 6.0
if img_width > 0:
doc.add_picture(image_path, width=Inches(max_doc_width))
doc.add_paragraph() # Spacing after image
# Add extracted text with preserved line breaks
paragraphs = text.split('\n')
for para in paragraphs:
if para.strip():
p = doc.add_paragraph()
p.add_run(para)
# Save the document
doc.save(output_path)
print(f"Created DOCX: {output_path}")
def create_pdf(output_path, text, image_path):
"""Create a .pdf file with text and image preserving layout."""
c = canvas.Canvas(output_path, pagesize=letter)
# Add image (full size, adjusted to fit page)
img = Image.open(image_path)
img_width, img_height = img.size
if img_width > 0:
aspect = img_height / float(img_width)
max_width = 500 # Max width in points
new_height = max_width * aspect
# Check if image fits on page, adjust if necessary
if new_height > 600:
new_height = 600
max_width = new_height / aspect
c.drawImage(image_path, 30, 650 - new_height, width=max_width, height=new_height)
# Add text below image
text_y = 650 - new_height - 20
else:
text_y = 650
# Add text
text_obj = c.beginText(30, text_y)
text_obj.setFont("Helvetica", 10)
paragraphs = text.split('\n')
for para in paragraphs:
if para.strip():
# Check if we need a new page
if text_obj.getY() < 50:
c.drawText(text_obj)
c.showPage()
text_obj = c.beginText(30, 750)
text_obj.setFont("Helvetica", 10)
text_obj.textLine(para)
c.drawText(text_obj)
c.save()
print(f"Created PDF: {output_path}")
def process_images():
"""Process all JPG files in the input directory."""
if not os.path.exists(input_dir):
print(f"Error: Directory not found: {input_dir}")
return
jpg_files = [f for f in os.listdir(input_dir) if f.lower().endswith('.jpg')]
if not jpg_files:
print(f"No JPG files found in: {input_dir}")
return
print(f"Found {len(jpg_files)} JPG files to process")
for filename in jpg_files:
image_path = os.path.join(input_dir, filename)
print(f"\nProcessing: {filename}")
try:
# Perform OCR
text = perform_ocr_with_chatgpt(image_path)
if text is None:
print(f"Failed to extract text from {filename}")
continue
print(f"Successfully extracted text ({len(text)} characters)")
# Generate output file names
base_name = os.path.splitext(filename)[0]
docx_output = os.path.join(input_dir, base_name + '.docx')
pdf_output = os.path.join(input_dir, base_name + '.pdf')
# Create .docx and .pdf files
create_docx(docx_output, text, image_path)
create_pdf(pdf_output, text, image_path)
print(f"Successfully processed {filename}")
except Exception as e:
print(f"Error processing {filename}: {str(e)}")
if __name__ == "__main__":
print("Starting OCR processing with ChatGPT...")
print(f"Scanning folder: {input_dir}")
process_images()
print("\nProcessing complete!")