Hello,

My application using async function which can call Assistants API. When there are two requests sending to the same thread, it throws this error.

{'error': {'message': "Can't add messages to thread_Fmzxw5GjqKcFBaVeMeM2ozTF while a run run_6b3A7jCcFwHikdR9si409rCj is active.", 'type': 'invalid_request_error', 'param': None, 'code': None}}

At this points it seems that i can’t find a way to retrieve in_process runs so that i can wait until there is no in_process runs.

My code is simple, just like this.

async def send_openai(assistant_id, message, thread=None):
     if not thread: 
         thread = _openai.start_thread()
     _openai.beta.threads.messages.create(
                 thread_id=thread_id, role="user", content=message
     )
    run = _openai.beta.threads.runs.create(
            thread_id=thread,
            assistant_id=assistant_id,
            instructions="Some instruction",
        )
    while True:
            run = _openai.beta.threads.runs.retrieve(
                 thread_id=thread_id, run_id=run.id
            )
            run_status = _get_response_status(
               run
            )
            if run_status == "ok":
                return run
            if run_status == "failed":
                return None
            if run_status == "retry":
                await asyncio.sleep(10)

def get_response_status(data: Run) -> Literal["retry", "failed", "ok"]:
    if data.status in ("completed",):
        return "ok"
    if data.status in (
        "queue",
        "in_progress",
    ):
        return "retry"
    if data.status in (
        "requires_action",
        "cancelling",
        "cancelled",
        "failed",
        "expired",
    ):
        return "failed"

A thread is a “chat history” of a session.

It would make sense to lock out multiple runs using the same thread object, as you would get two different AI replies added, breaking conversational context and chat continuation based on previous inputs and responses.