I am uploading en-masse (massively) some files located into a directory to OpenAI:
from openai import OpenAI
from dotenv import load_dotenv
import os
from pathlib import Path
load_dotenv()
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
def getFilelistFileName(directory):
import hashlib
h = hashlib.new('sha256')
h.update(directory.encode())
return "appdata/"+h.hexdigest()
def listUploadedFiles(directory):
fileListFile=getFilelistFileName(directory)
file_list = []
if os.path.isfile(fileListFile):
with open(fileListFile, 'r') as fp:
file_list = fp.readlines()
return file_list
def uploadFiles(directory):
global client
file_list=listUploadedFiles(directory)
dirPath = Path(directory)
uploaded_files=[]
for file in dirPath.iterdir():
if not file.isFile() or file in file_list:
continue
response = client.files.create(
file=open(file, "rb"),
purpose="assistants"
)
uploaded_files.append(response.id)
return uploaded_files
if __name__ == "__main__":
uploadFiles('files/social')
But in my worst case I’ll have to perform as many api calls as my files are that indicates 2 thinks:
- I have to delay the calls thus making my application slower
- I make too many requests in my worst case if I have 1000 files I have to perform the API call 1000 times. That means I may blow up the rate limit.
How I can massively upload files with less api calls. I already keep record of what I already uploaded and I am looking for a way to upload files en masse.