Hi guys,
Need some help. I am trying to connect Asterisk to OpenAI’s API.
Running a script via EAGI.
Somehow no audio is buffered. I believe my server / client both are properly configured as I can make internal calls, so I’d say we could oversee that as a possible issue but not completely discard it.
But I believe the issue here is in the script? What am I doing wrong?
Any feedback is welcome.
Thank you!
#!/usr/bin/env python3
import sys
import os
import json
import websockets
import asyncio
import audioop
import base64
# OpenAI Real-time API credentials and endpoint
OPENAI_API_KEY = "YOUR_API_KEY"
OPENAI_REALTIME_API_URL = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"
# AGI response helper
def agi_response(text):
sys.stdout.write(f'{text}\n')
sys.stdout.flush()
async def stream_audio_to_openai(audio_stream):
agi_response("VERBOSE Connecting to OpenAI WebSocket API")
try:
async with websockets.connect(
OPENAI_REALTIME_API_URL,
extra_headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"OpenAI-Beta": "realtime=v1"
}
) as websocket:
agi_response("VERBOSE Connected to OpenAI API successfully")
# Initial session update message
await websocket.send(json.dumps({
"type": "session.update",
"session": {
"instructions": "Your new instructions here"
}
}))
agi_response("VERBOSE Initial request sent.")
initial_response = await websocket.recv()
agi_response(f"VERBOSE Initial response received: {initial_response}")
session_id = json.loads(initial_response)["session"]["id"]
agi_response(f"VERBOSE Session ID: {session_id}")
agi_response("VERBOSE Streaming audio to OpenAI")
while True:
chunk = audio_stream.read(8192) # Increased to 8192 bytes
agi_response(f"VERBOSE Audio chunk size received: {len(chunk)} bytes") # Check chunk size
if len(chunk) == 0:
agi_response("VERBOSE No audio data received.")
break
# Convert and process the audio
try:
pcm_chunk = audioop.ulaw2lin(chunk, 2)
Base64EncodedAudioData = base64.b64encode(pcm_chunk).decode('utf-8')
await websocket.send(json.dumps({
"type": "input_audio_buffer.append",
"audio": Base64EncodedAudioData,
}))
agi_response("VERBOSE Audio chunk sent to OpenAI.")
await websocket.send(json.dumps({
"type": "input_audio_buffer.commit",
}))
agi_response("VERBOSE Audio buffer committed.")
except Exception as e:
agi_response(f"VERBOSE Error processing audio: {e}")
break
# Handle response from OpenAI
response = await websocket.recv()
response_json = json.loads(response)
if 'error' in response_json:
agi_response(f"VERBOSE Error from OpenAI: {response_json['error']['message']}")
break
else:
# Process the audio output if any
audio_output = response_json.get("delta")
if audio_output:
agi_response(f"VERBOSE Received audio output from OpenAI.")
# Handle audio output here
agi_response("VERBOSE Audio streaming to OpenAI completed.")
except Exception as e:
agi_response(f"VERBOSE Error streaming audio to OpenAI: {e}")
def main():
agi_response("VERBOSE Starting AGI script")
agi_response("ANSWER")
agi_response("VERBOSE Call answered successfully.")
try:
# Use file descriptor 3 for EAGI
audio_stream = os.fdopen(3, 'rb')
agi_response("VERBOSE Audio stream initialized.")
asyncio.run(stream_audio_to_openai(audio_stream))
except Exception as e:
agi_response(f"VERBOSE Error during audio streaming: {e}")
agi_response("HANGUP")
return
agi_response("HANGUP")
agi_response("VERBOSE Call hung up successfully.")
if __name__ == "__main__":
main()
Let me also paste the console log:
<PJSIP/2000-00000001>AGI Rx << VERBOSE Session ID: sess_AMIx2SAbm4RkmIl5DzNKJ
gpt_spanish_response.agi: Session
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Streaming audio to OpenAI
gpt_spanish_response.agi: Streaming
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk size received: 8192 bytes
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk sent to OpenAI.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio buffer committed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024006, ts 533951431, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024007, ts 533951591, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024008, ts 533951751, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024009, ts 533951911, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024010, ts 533952071, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024011, ts 533952231, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024012, ts 533952391, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024013, ts 533952551, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024014, ts 533952711, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024015, ts 533952871, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024016, ts 533953031, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024017, ts 533953191, len 000160)
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk size received: 8192 bytes
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk sent to OpenAI.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio buffer committed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Error from OpenAI: Error committing input audio buffer: buffer too small. Expected at least 100ms of audio, but buffer only has 0.00ms of audio.
gpt_spanish_response.agi: Error
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024018, ts 533953351, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024019, ts 533953511, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024020, ts 533953671, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024021, ts 533953831, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024022, ts 533953991, len 000160)
Got RTP packet from 217.61.226.139:19917 (type 00, seq 024023, ts 533954151, len 000160)
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio streaming to OpenAI completed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << HANGUP
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Call hung up successfully.