Having an API that sends thousands of words that you sent right back at you would seem silly, don’t you think?
Here’s producing a JSONL file using chat completions. I was going to show how simple it was, but then I told an AI to make in robust and harder to break and fixed what it broke.
A function for making a training example line
from openai import OpenAI
client = OpenAI(timeout=90)
import json
def make_jsonl_entry(input_messages, api_response, tools):
# Get the assistant message object
msg = api_response.choices[0].message
# Use model_dump() if available, else fallback to __dict__
msg_dict = msg.model_dump() if hasattr(msg, "model_dump") else msg.__dict__
# Only keep the keys you want, and handle tool_calls/content being optional
entry = {
"role": msg_dict["role"],
}
if "content" in msg_dict and msg_dict["content"] is not None:
entry["content"] = msg_dict["content"]
if "tool_calls" in msg_dict and msg_dict["tool_calls"]:
entry["tool_calls"] = msg_dict["tool_calls"]
# If you always want tool_calls present (even if empty), uncomment:
# entry["tool_calls"] = msg_dict.get("tool_calls", [])
return json.dumps({
"messages": input_messages + [entry],
"parallel_tool_calls": False,
"tools": tools,
})
Carry on with your normal usage:
input = [
{"role": "system", "content": "You are a helpful language assistant."},
{"role": "user", "content": "Say 'banana' 10 times!"},
]
tools = []
api_response = client.chat.completions.create(
model="gpt-4-turbo-2024-04-09", max_tokens=5, messages=input, tools=tools,
)
print(f"AI said:\n{api_response.choices[0].message.content}")
jsonl_chat_to_save = make_jsonl_entry(input, api_response, tools)
print(f"\n\nYour training file line:\n{jsonl_chat_to_save}")
If you simply must get what you just sent from an API, you’ve got the Responses endpoint. The data return is not your friend, though (nor is an API that makes you pay for more than the desired output length.)
r_input = [
{
"role": "system",
"content": [
{"type": "input_text", "text": "You are a helpful language assistant."}
]
},
{
"role": "user",
"content": [
{"type": "input_text", "text": "Say 'banana' 10 times!"}
]
}
]
response = client.responses.create(
model="gpt-4-turbo-2024-04-09",
input=r_input,
max_output_tokens=16,
)
print(json.dumps(response.model_dump(),indent=2)) # can't trust index 0
input_get = client.responses.input_items.list(response.id, order="asc")
print(f"retrieved inputs: {input_get.model_dump()}") # not chatC items