Timeout for OpenAI chat completion in Python

Here is the commit that made the change, no timeout supported:

This still works to throw an error:
openai.api_requestor.TIMEOUT_SECS = 2

openai.error.Timeout: Request timed out: HTTPSConnectionPool(host=‘api.openai.com’, port=443): Read timed out. (read timeout=2)


Here’s threading a non-stream chat completion (response is data structure, not unhandled generator) that I simplified from my other hacks – with a timeout parameter.

(You can make methods throw an error instead of just reporting a message.)

import openai
import threading
openai.api_key = key

def api_call(result, api_parameters):
    api_response = openai.ChatCompletion.create(**api_parameters)
    result[0] = api_response

def chat_c_threaded(api_parameters):
    timeout = api_parameters.pop("timeout", None)
    result = [None]

    api_thread = threading.Thread(target=api_call, args=(result, api_parameters))
    api_thread.start()
    api_thread.join(timeout=timeout)  # Set the timeout for the API call

    if api_thread.is_alive():
        # If the API call is still running after the timeout, terminate it
        print("API call timeout, retrying...")

        api_thread.join(timeout=timeout + 1)  # Retry
        if api_thread.is_alive(): 
            print("API call still hanging, retry failed.")
            return {
                "choices": [
                    {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "API Timeout"
                    },
                    "finish_reason": "timeout"
                    }
                ],
            }

    # The API call has finished within the timeout or retried successfully
    return result[0]
# Usage ------------
if __name__ == "__main__":
    print("Threaded timeout example")
    for maxtoken in [10, 100, 500]:
        # set as json (not using equal sign), now with a working 'timeout'
        chat_properties = {
            "model": "gpt-3.5-turbo", "max_tokens": maxtoken, "top_p": 0.1, "timeout": 2.5,
            "messages": [
                {"role": "system", "content": "You are an AI assistant"},
                {"role": "user", "content": "Write a leprechaun story"},
            ]
        }
        print(chat_c_threaded(chat_properties)['choices'][0]['message'])

The usage example runs at three max_tokens values so you see good vs timeout.

1 Like