Upload File use OpenAi Storage

Hi i want to build custom Ai i use python for backend and the framework use flask + flask restful api, when i want to upload file to openai storage the file successfully but the response message i get internal server error 500 heres the error

successfully with ID: file-prCm7Qh9c0ku9Zarv3ersPKu
Response to be sent: {‘message’: ‘File successfully uploaded and analyzed!’, ‘id’: ‘file-prCm7Qh9c0ku9Zarv3ersPKu’, ‘filename’: ‘uploads\SWOT_Analysis.pdf’, ‘object’: ‘file’, ‘analysis’: {‘error’: “Package not found at ‘uploads\SWOT_Analysis.pdf’”, ‘message’: ‘Error while analyzing file content’}}
[2024-08-18 17:42:05,927] ERROR in app: Exception on /uploads [POST]
Traceback (most recent call last):
File “C:\Users\raiha\PycharmProjects\s2iassistants.venv\Lib\site-packages\flask\app.py”, line 880, in full_dispatch_request
rv = self.dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\raiha\PycharmProjects\s2iassistants.venv\Lib\site-packages\flask\app.py”, line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\raiha\PycharmProjects\s2iassistants.venv\Lib\site-packages\flask_restful_init_.py”, line 493, in wrapper
return self.make_response(data, code, headers=headers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\raiha\PycharmProjects\s2iassistants.venv\Lib\site-packages\flask_restful_init_.py”, line 522, in make_response
resp = self.representations[mediatype](data, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\raiha\PycharmProjects\s2iassistants.venv\Lib\site-packages\flask_restful\representations\json.py”, line 21, in output_json
dumped = dumps(data, **settings) + “\n”
^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Python312\Lib\json_init_.py”, line 231, in dumps
return _default_encoder.encode(obj)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Python312\Lib\json\encoder.py”, line 200, in encode
chunks = self.iterencode(o, _one_shot=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Python312\Lib\json\encoder.py”, line 258, in iterencode
return _iterencode(o, 0)
^^^^^^^^^^^^^^^^^
File “C:\Python312\Lib\json\encoder.py”, line 180, in default
raise TypeError(f’Object of type {o.class.name} ’
TypeError: Object of type Response is not JSON serializable
127.0.0.1 - - [18/Aug/2024 17:42:05] “POST /uploads HTTP/1.1” 500 -

Hi @raihanardila22 and welcome to the forums :tada:!

It almost feels like a wrong purpose was used (e.g. Finetuning and Batch support only .jsonl files), but I am just guessing here.

To isolate the issue from all the other flask code, are you able to run the file upload from the command-line, e.g. using curl?

So:

curl https://api.openai.com/v1/files \
  -H "Authorization: Bearer $OPENAI_API_KEY" \
  -F purpose="assistants" \
  -F file="@SWOT_Analysis.pdf"

And then see if you can retrieve it:

curl https://api.openai.com/v1/files/<INSERT_FILE_ID_HERE> \
  -H "Authorization: Bearer $OPENAI_API_KEY"
1 Like

i have this code when i try in google collab its works when ill implementation for backend server internal error this my code
this upload.py
from flask_restful import Resource
from flask import request, jsonify
from langchain.utils import openai
from utils import save_file_locally, analyze_file_content, upload_file

class UploadFile(Resource):
def post(self):
if ‘file’ not in request.files:
return jsonify({‘error’: ‘No file part’}), 400
file = request.files[‘file’]
if file.filename == ‘’:
return jsonify({‘error’: ‘No selected file’}), 400

    # Save the file locally
    file_path = save_file_locally(file)

    # Upload the file to the OpenAI API
    file_id, file_name = upload_file(file_path)

    # Ensure that the response is JSON-serializable
    if file_id and file_name:
        response = {
            'message': "File successfully uploaded!",
            'file_id': file_id,
            'file_name': file_name,
        }
        return jsonify(response), 201
    else:
        return jsonify({'error': 'File upload failed'}), 500

and this utils.py for supporting file uploads
import os
from docx import Document
from fpdf import FPDF
import csv
import pandas as pd
from openai import OpenAI
from werkzeug.utils import secure_filename

from config import config

client = OpenAI(
organization=config.OPENAI_ORGANIZATION,
project=config.OPENAI_PROJECT_ID,
api_key=config.OPENAI_API_KEY,
)

Dictionary untuk menyimpan file_id dan nama file

file_storage = {}

Function to upload file to OpenAI API

def upload_file(file_path):
try:
with open(file_path, ‘rb’) as f:
response = client.files.create(
file=f,
purpose=‘assistants’
)

    file_id = response.get('id')
    file_name = os.path.basename(file_path)

    if not file_id:
        raise Exception("Failed to get 'id' from response")

    print(f"File '{file_name}' uploaded successfully with ID: {file_id}")
    return file_id, file_name
except Exception as e:
    print(f"Error during file upload: {e}")
    return None, None

Function to save file locally with a secure filename

def save_file_locally(file, upload_dir=“uploads”):
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)

filename = secure_filename(file.filename)
file_path = os.path.join(upload_dir, filename)
file.save(file_path)
return file_path

Fungsi untuk menemukan dan mengunduh file berdasarkan nama file

def find_and_download_file(file_name_or_id):
try:
# Normalize the input to lowercase
file_name_or_id = file_name_or_id.lower()

    # Check if the input is a file ID directly
    if file_name_or_id.startswith("file-"):
        file_id = file_name_or_id
    else:
        # If it's not an ID, search by file name (case-insensitive)
        file_list = client.files.list()
        matching_files = [f for f in file_list['data'] if f['filename'].lower() == file_name_or_id]
        if not matching_files:
            return None, f"File '{file_name_or_id}' not found."
        file_id = matching_files[0]['id']

    # Retrieve and download the file by its ID
    file_path, error = find_and_download_file_by_id(file_id)
    if error:
        return None, error
    return file_path, None
except Exception as e:
    return None, f"Error while finding file: {e}"

def find_and_download_file_by_id(file_id):
try:
# Retrieve the file metadata (optional but useful for validation)
file_info = client.files.retrieve(file_id)

    # Download the file content from the server
    content = client.files.download(file_id)

    # Save the content to a local file
    download_path = f"downloaded_{file_info['filename']}"  # Use the original file name
    with open(download_path, 'wb') as f:
        f.write(content)

    return download_path, None
except Exception as e:
    return None, f"Error during file download: {e}"

Fungsi untuk menganalisis konten file sebelum menghasilkan dokumen

def analyze_file_content(file_path):
try:
# Attempt to analyze the content
if file_path.endswith(‘.docx’, ‘.pdf’):
doc = Document(file_path)
full_text = [paragraph.text for paragraph in doc.paragraphs]
word_count = len(" ".join(full_text).split())
analysis_result = {
“word_count”: word_count,
“content_summary”: " ".join(full_text[:50]) + “…”
}
return analysis_result
else:
# Raise an exception for unsupported file formats
raise ValueError(“Unsupported file format. Only DOCX files are supported.”)
except Exception as e:
# Ensure that only strings are returned in the error response
return {
“error”: str(e),
“message”: “Error while analyzing file content”
}

def save_to_docx(text, file_name=‘output.docx’):
doc = Document()
doc.add_paragraph(text)
doc.save(file_name)
return file_name

def save_to_pdf(text, file_name=‘output.pdf’):
try:
pdf = FPDF(orientation=“P”, unit=“mm”, format=“A4”)
pdf.add_page()
pdf.set_font(“Arial”, size=12)
pdf.multi_cell(0, 10, text)
pdf.output(file_name)
return file_name
except Exception as e:
print(f"Error while generating PDF: {e}")
return None

def save_to_csv(text, file_name=‘output.csv’):
with open(file_name, ‘w’, newline=‘’) as file:
writer = csv.writer(file)
writer.writerow([‘Text’])
writer.writerow([text])
return file_name

def save_to_excel(text, file_name=‘output.xlsx’):
df = pd.DataFrame({‘Text’: [text]})
df.to_excel(file_name, index=False)
return file_name