A bit peeved about bad gateway and still getting charged

I’ve been hit with bad gateway errors from GPT-4 all day long but am still getting hit with charges. It’s showing completions, but they’re not coming through on my API. I have paying customers unable to access the service I’m trying to provide them because GPT-4 just isn’t available.

Here’s my code. It works fine for lower token sizes but bombs out automatically for 3 and 4k returns. I have a function chopping up my users input document and passing it to the route at sub 3k sizes looking for a sub 3k return and nada. You can basically ignore the retry stuff. I more or less have tried every retry and backoff method under the sun. How are you gonna charge us for GPT-4 calls that don’t work?

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
@app.route("/chunks", methods=["POST"])
def chunks():
    try:


        data = request.get_json()
        file_url = data["fileURL"]
        instruction_set = data["instructionSet"]

        # Extract the input file's name from the fileURL and create a new name for the output file
        input_file_name = file_url.split("/")[-1]
        unique_id = secrets.token_hex(3)  # Generate a 5-character unique identifier
        output_file_name = f"{input_file_name.split('.')[0]}_output_{unique_id}.docx"

        # Download and read the file
        docx_file = requests.get(file_url).content
        docx_file_stream = BytesIO(docx_file)
        doc = Document(docx_file_stream)

        # Save the Document object to a temporary file
        with NamedTemporaryFile(delete=False, suffix=".docx") as temp_docx:
            doc.save(temp_docx.name)
            # Read the temporary file using docx2json.convert()
            json_file = json.loads(convert(temp_docx.name))

        instruction_set_token_count = len(enc.encode(instruction_set))
        MAX_CHUNK_TOKENS = 2000 - instruction_set_token_count
        OVERHANG = 500

        json_file_dump = json.dumps(json_file)
        app.logger.debug(f"Type of json.dumps(json_file): {type(json_file_dump)}")
        token_count = count_tokens(json_file_dump)

        user_id = data["userId"]

        # Save the input file to the history folder in Firebase Storage
        bucket = storage.bucket()
        input_blob = bucket.blob(f"users/{user_id}/history/{input_file_name}")
        input_blob.upload_from_string(docx_file, content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document")

        # Create a Firestore DB entry for the history folder
        history_collection = db.collection("users").document(user_id).collection("history")
        history_entry = {
            "fileName": input_file_name,
            "timestamp": datetime.datetime.utcnow(),
            "owner": user_id,
            "storagePath": f"users/{user_id}/history/{input_file_name}",
            "fileType": "docx"
        }
        history_collection.add(history_entry)

        # Save the raw JSON to the archive folder
        archive_blob = bucket.blob(f"users/{user_id}/archive/{input_file_name.split('.')[0]}_archive.json")
        archive_blob.upload_from_string(json.dumps(json_file), content_type="application/json")
        
        if token_count + instruction_set_token_count > MAX_TOKENS:
            # Split the input JSON into chunks
            json_chunks = iteratively_chunk_json(json_file, instruction_set, MAX_CHUNK_TOKENS - instruction_set_token_count, OVERHANG)
            app.logger.debug(f"Input JSON is split into {len(json_chunks)} chunks")
            app.logger.debug(print(json_chunks))

            # Process each chunk with GPT-4
            completions = []
            responses = []
            for idx, chunk in enumerate(json_chunks):
                app.logger.debug(f"Processing chunk {idx+1}/{len(json_chunks)}")
                user_input_str = instruction_set + json.dumps(chunk)
                completion = openai.ChatCompletion.create(
                    model="gpt-4",
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a market researcher who is an expert survey and discussion guide writer. You always include answer choices when it is appropriate. You write complete market research surveys and discussion guides by first reading through an example and then outputting a complete survey including both questions and answers.",
                        },
                            {"role": "user", "content": user_input_str},
                    ],
                    max_tokens=500,
                    temperature=0.6,
                    top_p=1,
                    frequency_penalty=0,
                    presence_penalty=0,
                )

                completions.append(completion.choices[0].message.content)
                responses.append(completion.choices[0].message.content)

        else:  # If the token count does not exceed the limit, process the entire content
            app.logger.debug(f"Token count does not exceed the limit, processing the entire content")
            user_input_str = instruction_set + json.dumps(json_file)
            completion = openai.ChatCompletion.create(
                model="gpt-4",
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful assistant.",
                    },
                    {"role": "user", "content": user_input_str},
                ],
                max_tokens=2000,
                temperature=0.6,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )

            completions = [completion.choices[0].message.content]
            responses = [completion.choices[0].message.content]
 
        # Stitch the completions together
        stitched_content = ""
        stitch_message = "A STITCH OCCURRED HERE PLEASE CHECK CONTENT FOR CONSISTENCY ESPECIALLY WITH SURVEYS AND SURVEY LOGIC. YOU MAY NEED TO DELETE REPEATED CONTENT CAUSED BY CHUNKING THE FILE."
        for idx, response in enumerate(responses):
            if idx > 0:
                stitched_content += stitch_message
            stitched_content += response

        # Save the stitched content to a JSON file
        output_data = {"content": stitched_content}
        output_json_str = json.dumps(output_data)

        # Convert the JSON file back to docx
        output_docx = Document()

        paragraphs = json.loads(output_json_str)
        if isinstance(paragraphs, list):
            for paragraph in paragraphs:
                p = output_docx.add_paragraph()
                p.add_run(paragraph["text"])
        else:
            p = output_docx.add_paragraph()
            p.add_run(stitched_content)

        # Save the output docx
        output_docx_stream = BytesIO()
        output_docx.save(output_docx_stream)

        # Upload the docx file back to the user's Firestore bucket
        bucket = storage.bucket()
        output_docx_stream.seek(0)
        blob = bucket.blob(f"users/{user_id}/output/{output_file_name}")  # Change this line
        blob.upload_from_file(output_docx_stream, content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document")

        # Create a Firestore DB entry for the output folder
        output_collection = db.collection("users").document(user_id).collection("output")
        output_entry = {
            "fileName": output_file_name,
            "timestamp": datetime.datetime.utcnow(),
            "owner": user_id,
            "storagePath": f"users/{user_id}/output/{output_file_name}",
            "fileType": "docx"
        }
        # After the output file is created and Firestore DB entry is made for the output folder
        output_collection.add(output_entry)

        # Add the snippet here
        input_document_id = data["inputDocumentId"]

        # Update the 'processed' field in the 'uploadedFiles' entry for the input file
        uploaded_files_collection = db.collection("uploadedFiles")
        uploaded_files_document = uploaded_files_collection.document(input_document_id)
        doc_snapshot = uploaded_files_document.get()

        if doc_snapshot.exists:
            uploaded_files_document.update({"processed": True})
            if not deduct_credits_chunk(user_id, token_count, count_tokens(output_json_str)):
                return jsonify({"error": "Insufficient credits"}), 402

        else:
            app.logger.warning(f"Document with ID {input_document_id} not found in Firestore uploadedFiles collection")
            
    except Exception as e:
            app.logger.error(f"Unhandled error: {str(e)}")
            app.logger.error(traceback.format_exc())
            return jsonify({"error": "An unexpected error occurred"}), 500

    return jsonify({"firebase_storage_route": input_blob.path, "output_file_name": output_file_name})

Same thing is happening to me. It’s not until the final request that it bombs out. It’s happening like 50% of the time now.