I’ve been hit with bad gateway errors from GPT-4 all day long but am still getting hit with charges. It’s showing completions, but they’re not coming through on my API. I have paying customers unable to access the service I’m trying to provide them because GPT-4 just isn’t available.
Here’s my code. It works fine for lower token sizes but bombs out automatically for 3 and 4k returns. I have a function chopping up my users input document and passing it to the route at sub 3k sizes looking for a sub 3k return and nada. You can basically ignore the retry stuff. I more or less have tried every retry and backoff method under the sun. How are you gonna charge us for GPT-4 calls that don’t work?
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
@app.route("/chunks", methods=["POST"])
def chunks():
try:
data = request.get_json()
file_url = data["fileURL"]
instruction_set = data["instructionSet"]
# Extract the input file's name from the fileURL and create a new name for the output file
input_file_name = file_url.split("/")[-1]
unique_id = secrets.token_hex(3) # Generate a 5-character unique identifier
output_file_name = f"{input_file_name.split('.')[0]}_output_{unique_id}.docx"
# Download and read the file
docx_file = requests.get(file_url).content
docx_file_stream = BytesIO(docx_file)
doc = Document(docx_file_stream)
# Save the Document object to a temporary file
with NamedTemporaryFile(delete=False, suffix=".docx") as temp_docx:
doc.save(temp_docx.name)
# Read the temporary file using docx2json.convert()
json_file = json.loads(convert(temp_docx.name))
instruction_set_token_count = len(enc.encode(instruction_set))
MAX_CHUNK_TOKENS = 2000 - instruction_set_token_count
OVERHANG = 500
json_file_dump = json.dumps(json_file)
app.logger.debug(f"Type of json.dumps(json_file): {type(json_file_dump)}")
token_count = count_tokens(json_file_dump)
user_id = data["userId"]
# Save the input file to the history folder in Firebase Storage
bucket = storage.bucket()
input_blob = bucket.blob(f"users/{user_id}/history/{input_file_name}")
input_blob.upload_from_string(docx_file, content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
# Create a Firestore DB entry for the history folder
history_collection = db.collection("users").document(user_id).collection("history")
history_entry = {
"fileName": input_file_name,
"timestamp": datetime.datetime.utcnow(),
"owner": user_id,
"storagePath": f"users/{user_id}/history/{input_file_name}",
"fileType": "docx"
}
history_collection.add(history_entry)
# Save the raw JSON to the archive folder
archive_blob = bucket.blob(f"users/{user_id}/archive/{input_file_name.split('.')[0]}_archive.json")
archive_blob.upload_from_string(json.dumps(json_file), content_type="application/json")
if token_count + instruction_set_token_count > MAX_TOKENS:
# Split the input JSON into chunks
json_chunks = iteratively_chunk_json(json_file, instruction_set, MAX_CHUNK_TOKENS - instruction_set_token_count, OVERHANG)
app.logger.debug(f"Input JSON is split into {len(json_chunks)} chunks")
app.logger.debug(print(json_chunks))
# Process each chunk with GPT-4
completions = []
responses = []
for idx, chunk in enumerate(json_chunks):
app.logger.debug(f"Processing chunk {idx+1}/{len(json_chunks)}")
user_input_str = instruction_set + json.dumps(chunk)
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are a market researcher who is an expert survey and discussion guide writer. You always include answer choices when it is appropriate. You write complete market research surveys and discussion guides by first reading through an example and then outputting a complete survey including both questions and answers.",
},
{"role": "user", "content": user_input_str},
],
max_tokens=500,
temperature=0.6,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
completions.append(completion.choices[0].message.content)
responses.append(completion.choices[0].message.content)
else: # If the token count does not exceed the limit, process the entire content
app.logger.debug(f"Token count does not exceed the limit, processing the entire content")
user_input_str = instruction_set + json.dumps(json_file)
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{
"role": "system",
"content": "You are a helpful assistant.",
},
{"role": "user", "content": user_input_str},
],
max_tokens=2000,
temperature=0.6,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
)
completions = [completion.choices[0].message.content]
responses = [completion.choices[0].message.content]
# Stitch the completions together
stitched_content = ""
stitch_message = "A STITCH OCCURRED HERE PLEASE CHECK CONTENT FOR CONSISTENCY ESPECIALLY WITH SURVEYS AND SURVEY LOGIC. YOU MAY NEED TO DELETE REPEATED CONTENT CAUSED BY CHUNKING THE FILE."
for idx, response in enumerate(responses):
if idx > 0:
stitched_content += stitch_message
stitched_content += response
# Save the stitched content to a JSON file
output_data = {"content": stitched_content}
output_json_str = json.dumps(output_data)
# Convert the JSON file back to docx
output_docx = Document()
paragraphs = json.loads(output_json_str)
if isinstance(paragraphs, list):
for paragraph in paragraphs:
p = output_docx.add_paragraph()
p.add_run(paragraph["text"])
else:
p = output_docx.add_paragraph()
p.add_run(stitched_content)
# Save the output docx
output_docx_stream = BytesIO()
output_docx.save(output_docx_stream)
# Upload the docx file back to the user's Firestore bucket
bucket = storage.bucket()
output_docx_stream.seek(0)
blob = bucket.blob(f"users/{user_id}/output/{output_file_name}") # Change this line
blob.upload_from_file(output_docx_stream, content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document")
# Create a Firestore DB entry for the output folder
output_collection = db.collection("users").document(user_id).collection("output")
output_entry = {
"fileName": output_file_name,
"timestamp": datetime.datetime.utcnow(),
"owner": user_id,
"storagePath": f"users/{user_id}/output/{output_file_name}",
"fileType": "docx"
}
# After the output file is created and Firestore DB entry is made for the output folder
output_collection.add(output_entry)
# Add the snippet here
input_document_id = data["inputDocumentId"]
# Update the 'processed' field in the 'uploadedFiles' entry for the input file
uploaded_files_collection = db.collection("uploadedFiles")
uploaded_files_document = uploaded_files_collection.document(input_document_id)
doc_snapshot = uploaded_files_document.get()
if doc_snapshot.exists:
uploaded_files_document.update({"processed": True})
if not deduct_credits_chunk(user_id, token_count, count_tokens(output_json_str)):
return jsonify({"error": "Insufficient credits"}), 402
else:
app.logger.warning(f"Document with ID {input_document_id} not found in Firestore uploadedFiles collection")
except Exception as e:
app.logger.error(f"Unhandled error: {str(e)}")
app.logger.error(traceback.format_exc())
return jsonify({"error": "An unexpected error occurred"}), 500
return jsonify({"firebase_storage_route": input_blob.path, "output_file_name": output_file_name})