below see example of session.update. With this setup:
-
response.output_audio.delta and response.output_audio.done get you audio stream from the AI
-
response.output_audio_transcript.done gets you the text transcript of what the AI said
-
conversation.item.input_audio_transcription.completed gets you the text transcript of what the user said
session_update:
session_update_message = {
"type": "session.update",
"session": {
"type": "realtime",
"model": "gpt-realtime",
"audio": {
"input": {
"format": {
"type": "audio/pcm",
"rate": 24000
},
"noise_reduction": {"type":"far_field"},
"transcription": {
"model": ""gpt-4o-mini-transcribe"
},
"turn_detection": {
"create_response": True,
"interrupt_response": False,
"prefix_padding_ms": 300,
"silence_duration_ms": 750,
"threshold": 0.5,
"type": "server_vad"
}
},
"output": {
"format": {
"type": "audio/pcm",
"rate": 24000
},
"speed":1,
"voice": "coral",
}
},
"instructions": "YOUR PROMPT HERE",
"max_output_tokens": 1024,
"output_modalities": ["audio"],
"tool_choice": "auto",
"tools":[TOOLS HERE IF ANY],
"tracing": None,
"truncation":"auto"
}
}