Multiple function calls with streaming

Hey!

I’m trying to re-implement function calling example from the documentation:
[https://platform.openai.com/docs/guides/function-calling](invoking multiple function calls in one response)

from openai import OpenAI
import json

client = OpenAI()

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "72", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
    )
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls
    # Step 2: check if the model wanted to call a function
    if tool_calls:
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_current_weather": get_current_weather,
        }  # only one function in this example, but you can have multiple
        messages.append(response_message)  # extend conversation with assistant's reply
        # Step 4: send the info for each function call and function response to the model
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(
                location=function_args.get("location"),
                unit=function_args.get("unit"),
            )
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )  # extend conversation with function response
        second_response = client.chat.completions.create(
            model="gpt-3.5-turbo-1106",
            messages=messages,
        )  # get a new response from the model where it can see the function response
        return second_response
print(run_conversation())

using streaming (stream=True) like this:

from openai import OpenAI
import json

client = OpenAI()

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "72", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [{"role": "user", "content": "Tell me a joke and then tell me what's the weather like in San Francisco, Tokyo, and Paris?"}]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    stream = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
        stream=True,
    )

    for chunk in stream:
        print(chunk)
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")

print(run_conversation())

There was another thread kind of related:

but the problem is that in the example from the documentation there are 3 function calls performed in “one go”:

[{'role': 'user', 'content': "Tell me a joke and then tell me what's the weather like in San Francisco, Tokyo, and Paris?"}, ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_1krPI8C4RYn99DeBxaf3ljQA', function=Function(arguments='{"location": "San Francisco, CA", "unit": "celsius"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_0OE3Y4Jj2E7mni40mdkjVHFj', function=Function(arguments='{"location": "Tokyo, Japan", "unit": "celsius"}', name='get_current_weather'), type='function'), ChatCompletionMessageToolCall(id='call_OC9JPoBNnDyrO9qPI2BybwZp', function=Function(arguments='{"location": "Paris, France", "unit": "celsius"}', name='get_current_weather'), type='function')]), {'tool_call_id': 'call_1krPI8C4RYn99DeBxaf3ljQA', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "San Francisco", "temperature": "72", "unit": "celsius"}'}, {'tool_call_id': 'call_0OE3Y4Jj2E7mni40mdkjVHFj', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "Tokyo", "temperature": "10", "unit": "celsius"}'}, {'tool_call_id': 'call_OC9JPoBNnDyrO9qPI2BybwZp', 'role': 'tool', 'name': 'get_current_weather', 'content': '{"location": "Paris", "temperature": "22", "unit": "celsius"}'}]
ChatCompletion(id='chatcmpl-8io4oq5lm5kz65wUeV0FBIYt2j3CW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Why don't skeletons fight each other?\n\nThey don't have the guts.\n\nThe current weather in San Francisco is 72°C and partly cloudy. In Tokyo, it's 10°C and mostly sunny. And in Paris, it's 22°C and partly sunny.", role='assistant', function_call=None, tool_calls=None))], created=1705689618, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_c596c86df9', usage=CompletionUsage(completion_tokens=54, prompt_tokens=183, total_tokens=237))

tool_calls array contains 3 objects.

When I try to do the same with streaming I get this:

ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id='call_3udRjEUIcAkzW1gxdVFW3Fc7', function=ChoiceDeltaToolCallFunction(arguments='', name='get_current_weather'), type='function')]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='location', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='":"', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='San', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments=' Francisco', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments=',', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments=' CA', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='"}', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')
ChatCompletionChunk(id='chatcmpl-8ioFUdOje5j7VfS4FsMFAE0L02NoA', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)], created=1705690280, model='gpt-4-1106-preview', object='chat.completion.chunk', system_fingerprint='fp_04de91a479')

This time in the first chunk I get only one tool_call (instead of 3). The rest of the chunks contain “chunked” arguments data (just for this first call).

My question is: is tools calling when streaming limited to a single function call or maybe I need to append the message and send it back calling the same method recursively until I won’t get all the “content” chunks?

Is this an API limitation? Looks like instead of 2 calls I need to do #_of_func + 1 (if it will even work that way)? Or maybe I’m doing something wrong here.

1 Like

OK, I’ve make it working (based on the examples from this forum).
Hope this will help someone with similar problem.

from openai import OpenAI
import json

client = OpenAI()

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "32", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

def run_conversation():
    # Step 1: send the conversation and available functions to the model
    messages = [{"role": "user", "content": "Tell me a joke and then tell me what's the weather like in San Francisco, Tokyo, and Paris?"}]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather in a given location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                    },
                    "required": ["location"],
                },
            },
        }
    ]
    stream = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=messages,
        tools=tools,
        tool_choice="auto",  # auto is default, but we'll be explicit
        stream=True,
    ) 

    available_functions = {
        "get_current_weather": get_current_weather,
    }  # only one function in this example, but you can have multiple

    response_text = ""
    tool_calls = []

    for chunk in stream:
        delta = chunk.choices[0].delta
        # print(delta)

        if delta and delta.content:
            # content chunk -- send to browser and record for later saving
            print(delta.content)
            response_text += delta.content

        elif delta and delta.tool_calls:
            tcchunklist = delta.tool_calls
            for tcchunk in tcchunklist:
                if len(tool_calls) <= tcchunk.index:
                    tool_calls.append({"id": "", "type": "function", "function": { "name": "", "arguments": "" } })
                tc = tool_calls[tcchunk.index]

                if tcchunk.id:
                    tc["id"] += tcchunk.id
                if tcchunk.function.name:
                    tc["function"]["name"] += tcchunk.function.name
                if tcchunk.function.arguments:
                    tc["function"]["arguments"] += tcchunk.function.arguments    

    # print(tool_calls)

    messages.append(
        {
            "tool_calls": tool_calls,
            "role": 'assistant',
        }                    
    )    

    for tool_call in tool_calls:
        function_name = tool_call['function']['name']
        function_to_call = available_functions[function_name]
        function_args = json.loads(tool_call['function']['arguments'])
        function_response = function_to_call(
            location=function_args.get("location"),
            unit=function_args.get("unit"),
        )
        messages.append(
            {
                "tool_call_id": tool_call['id'],
                "role": "tool",
                "name": function_name,
                "content": function_response,
            }
        )  # extend conversation with function response
    
    # print(messages)
    
    stream = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=messages,
        stream=True,
    ) 

    for chunk in stream:
        if chunk.choices[0].delta.content is not None:
            print(chunk.choices[0].delta.content, end="")        

run_conversation()

-Szymon

7 Likes

Hi Szymon,

Do you think this code can be adapted for the version with several tools that have dependencies between them?

For example, function B uses the results of function A as arguments.

From what I’ve researched, a While function must be used to loop through the model until it no longer requests tool calls, but I don’t know if this can also be applied to the streaming version.

I also specified in the system message and in the function description that function B depends on function A, but the gpt-3.5-turbo-1106 version with chat completions does not take this into account.

It is interesting that when I used an assistant it took the dependency into account. But streaming for assistants is not available.

Hey Roco,

I haven’t try this yet, but seems doable (with or without streaming).
How GPT works is:

  1. get the user prompt
  2. look if it can use functions
  3. provide function call’s params
  4. stop and wait for the functions to be called
  5. we handle function calls and craft response back to the GPT.
  6. GPT responses having the context of the function calls

So, in theory we could hook up to 5 and implement function call sequence, so that we call them in the right order and can pass the params to the ones called next.
There is one caviat tho. I’m not sure if GPT will “mark as callable” function if it cannot figure out it’s params beforehand.
Maybe this is something that can be handled via proper prompt-crafting.

Hope that helps. I may try this apporach out to see if it actually works or if you do, please let me know.

1 Like

Thanks, please take a look on this thread below.
Maybe that code from it could be also adapted for the streaming version.

Thanks for sharing your code. Is there a way to detect when you have received all data for each tool call, so you can start processing each one asynchronously before all of them have been generated and sent to the client?

Yes! I was trying to figure this out myself and could not find any resources, thus I implemented it myself.

Function calling snippet

Step 1: send the conversation and available functions to the model

stream_response1 = await client.chat.completions.create(
    model=DEPLOYMENT_NAME,
    messages=messages,
    tools=get_tools(),
    tool_choice="auto",
    temperature=0.1,
    top_p=0.95,
    max_tokens=4096,
    stream=True
)

# Convert the stream response to a list
stream_response1_list = [item async for item in stream_response1]

tool_calls = [] # Accumulator for tool calls to process later; 
full_delta_content = "" # Accumulator for delta content to process later

Process the stream response for tool calls and delta content
This iterates through the stream building the delta content and the tool calls to be made.

for chunk in stream_response1_list:
    delta = chunk.choices[0].delta if chunk.choices and chunk.choices[0].delta is not None else None

    if delta and delta.content:
        full_delta_content += delta.content
        
    elif delta and delta.tool_calls:
        tc_chunk_list = delta.tool_calls
        for tc_chunk in tc_chunk_list:
            if len(tool_calls) <= tc_chunk.index:
                tool_calls.append({"id": "", "type": "function", "function": {"name": "", "arguments": ""}})
            tc = tool_calls[tc_chunk.index]

            if tc_chunk.id:
                tc["id"] += tc_chunk.id
            if tc_chunk.function.name:
                tc["function"]["name"] += tc_chunk.function.name
            if tc_chunk.function.arguments:
                tc["function"]["arguments"] += tc_chunk.function.arguments

Step 2: check if the model wanted to call a function
If no tool calls, it returns another stream iterable to be used by the caller.

If there are tool calls, it makes the calls, adds the tool context to the messages, and makes the model output a response based on all of the tool calls’ returns/contexts.

if not tool_calls and full_delta_content:
    messages.append({ "role": "assistant", "content": full_delta_content })

    # Convert the list to a stream to return as a response
    async def list_to_stream():
        for item in stream_response1_list:
            yield item

    return list_to_stream()
elif tool_calls:

For more, check out my github repo with function calling examples (not allowed to post links here):

github: john-carroll-sw/chat-completions-function-calling-examples