The following script produces invalid json in the function argument in around 30% of test cases.
from openai import OpenAI
# from openai.types.chat.completion_create_params import ResponseFormat
from dotenv import load_dotenv
import os
import json
load_dotenv()
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are working with a python shell that has a pandas DataFrame.\nThe name of the dataframe is `df`.\nYou have pandas and numpy available as pd and np.This is the description of df:\n\nThis DataFrame contains balance sheet data of the company in question for several years.\nEach row contains the balance sheet data of a year.\nThe column 'Year' is very important. It is sorted DESC and contains the year for which the report is valid.\nThe column 'Currency' is also important, it specifies the currency in which the numbers are reported in.\n\nSome rules to follow:\n\n1. Group calculations per Year and include the Year column in your answer.\n2. If you are asked how a metric developed, respond with the absolute values of the last couple of years, not the percentage changes.\n3. Return information from the context in the form of a markdown table.\n4. Include the Currency column if possible.\n5. Don't use dropna on df, you could lose important information.\n6. If calculating diffs: use .diff(-1) to calculate differences to previous year. And don't include the Currency column for the diff, only in the assign part. Good example:\ndf[['Year', 'Total_Assets', 'Cash_Bank_Deposits', 'AS30', 'Goodwill', 'AS32', 'AS40']].diff(-1).assign(Year=df['Year'], Currency=df['Currency'])\n\nThis is the description of the relevant columns:\nColumn 'Trade_Receivables': Trade Receivables \nColumn 'Intragroup_Receivables': Intragroup Receivables\nColumn 'Other_Receivables': Other Receivables\nColumn 'Subtotal_Inventory': Subtotal Inventory\nColumn 'Total_Assets': Total Assets \nColumn 'Currency': Currency in which all figures for this balance sheet are reported\n\nThis is the result of `print(df.head())`:\n Other_Receivables Currency Year Total_Assets Intragroup_Receivables Subtotal_Inventory Trade_Receivables\n0 3.025e+09 EUR 2022 2.3510e+10 0 8.789101e+08 7.813689e+08\n1 1.801e+09 EUR 2019 2.2478e+10 0 7.458830e+08 8.145821e+08\n\nUse the tool to answer the questions posed to you.",
},
{
"role": "user",
"content": "For all of the following lines, calculate its share of total assets: Subtotal Liquid Assets, Trade Receivables, Intragroup Receivables, Other Receivables, Subtotal Inventory, Subtotal Tangible Fixed Assets, Subtotal Intangible Fixed Assets, Subtotal Financial Fixed Assets",
},
]
tools = [
{
"type": "function",
"function": {
"name": "python-tool",
"description": "A Python shell. Use this to execute python commands.\nNever start variable names with numbers!\n",
"parameters": {
"type": "object",
"properties": {
"query": {
"title": "Query",
"description": "python script or command WIHTOUT COMMENTS which will be evaluated by the eval command.\nNever start variable names with numbers!'",
"type": "string",
}
},
"required": ["query"],
},
},
}
]
function_arguments = []
for i in range(10):
chat_completion = client.chat.completions.create(
messages=messages,
model="gpt-4-1106-preview",
tools=tools,
tool_choice="auto",
seed=42,
# top_p=0,
# temperature=0,
top_p=0.000000000000001,
temperature=0.000000000000001,
n=1,
)
function_arguments.append(chat_completion.choices[0].message.tool_calls[0].function.arguments)
print("The number of unique different function_arguments is: ", len(set(function_arguments)))
malformed_argument_json = 0
for argument in function_arguments:
try:
json.loads(argument)
except json.JSONDecodeError:
malformed_argument_json += 1
print("The number of malformed function_arguments is: ", malformed_argument_json)
This results in
The number of malformed function_arguments is: 3
I also tried with the gpt4-0125 version and that worked as expected (Tested with 70 calls). However, in our organization we unfortunately donāt have access to the newest version.