Asterisk OpenAI API Python

Hi guys,

Need some help. I am trying to connect Asterisk to OpenAI’s API.
Running a script via EAGI.

Somehow no audio is buffered. I believe my server / client both are properly configured as I can make internal calls, so I’d say we could oversee that as a possible issue but not completely discard it.

But I believe the issue here is in the script? What am I doing wrong?
Any feedback is welcome.

Thank you!

#!/usr/bin/env python3

import sys
import os
import json
import websockets
import asyncio
import audioop
import base64

# OpenAI Real-time API credentials and endpoint
OPENAI_API_KEY = "YOUR_API_KEY"
OPENAI_REALTIME_API_URL = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"

# AGI response helper
def agi_response(text):
    sys.stdout.write(f'{text}\n')
    sys.stdout.flush()

async def stream_audio_to_openai(audio_stream):
    agi_response("VERBOSE Connecting to OpenAI WebSocket API")

    try:
        async with websockets.connect(
            OPENAI_REALTIME_API_URL,
            extra_headers={
                "Authorization": f"Bearer {OPENAI_API_KEY}",
                "OpenAI-Beta": "realtime=v1"
            }
        ) as websocket:
            agi_response("VERBOSE Connected to OpenAI API successfully")

            # Initial session update message
            await websocket.send(json.dumps({
                "type": "session.update",
                "session": {
                    "instructions": "Your new instructions here"
                }
            }))
            agi_response("VERBOSE Initial request sent.")

            initial_response = await websocket.recv()
            agi_response(f"VERBOSE Initial response received: {initial_response}")

            session_id = json.loads(initial_response)["session"]["id"]
            agi_response(f"VERBOSE Session ID: {session_id}")

            agi_response("VERBOSE Streaming audio to OpenAI")
            while True:
                chunk = audio_stream.read(8192)  # Increased to 8192 bytes
                agi_response(f"VERBOSE Audio chunk size received: {len(chunk)} bytes")  # Check chunk size

                if len(chunk) == 0:
                    agi_response("VERBOSE No audio data received.")
                    break

                # Convert and process the audio
                try:
                    pcm_chunk = audioop.ulaw2lin(chunk, 2)
                    Base64EncodedAudioData = base64.b64encode(pcm_chunk).decode('utf-8')

                    await websocket.send(json.dumps({
                        "type": "input_audio_buffer.append",
                        "audio": Base64EncodedAudioData,
                    }))
                    agi_response("VERBOSE Audio chunk sent to OpenAI.")
                    
                    await websocket.send(json.dumps({
                        "type": "input_audio_buffer.commit",
                    }))
                    agi_response("VERBOSE Audio buffer committed.")

                except Exception as e:
                    agi_response(f"VERBOSE Error processing audio: {e}")
                    break

                # Handle response from OpenAI
                response = await websocket.recv()
                response_json = json.loads(response)
                if 'error' in response_json:
                    agi_response(f"VERBOSE Error from OpenAI: {response_json['error']['message']}")
                    break
                else:
                    # Process the audio output if any
                    audio_output = response_json.get("delta")
                    if audio_output:
                        agi_response(f"VERBOSE Received audio output from OpenAI.")
                        # Handle audio output here

            agi_response("VERBOSE Audio streaming to OpenAI completed.")
    except Exception as e:
        agi_response(f"VERBOSE Error streaming audio to OpenAI: {e}")

def main():
    agi_response("VERBOSE Starting AGI script")
    
    agi_response("ANSWER")
    agi_response("VERBOSE Call answered successfully.")

    try:
        # Use file descriptor 3 for EAGI
        audio_stream = os.fdopen(3, 'rb')
        agi_response("VERBOSE Audio stream initialized.")

        asyncio.run(stream_audio_to_openai(audio_stream))
    except Exception as e:
        agi_response(f"VERBOSE Error during audio streaming: {e}")
        agi_response("HANGUP")
        return

    agi_response("HANGUP")
    agi_response("VERBOSE Call hung up successfully.")

if __name__ == "__main__":
    main()

Let me also paste the console log:

<PJSIP/2000-00000001>AGI Rx << VERBOSE Session ID: sess_AMIx2SAbm4RkmIl5DzNKJ
gpt_spanish_response.agi: Session
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Streaming audio to OpenAI
gpt_spanish_response.agi: Streaming
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk size received: 8192 bytes
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk sent to OpenAI.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio buffer committed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024006, ts 533951431, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024007, ts 533951591, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024008, ts 533951751, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024009, ts 533951911, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024010, ts 533952071, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024011, ts 533952231, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024012, ts 533952391, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024013, ts 533952551, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024014, ts 533952711, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024015, ts 533952871, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024016, ts 533953031, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024017, ts 533953191, len 000160)
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk size received: 8192 bytes
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio chunk sent to OpenAI.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio buffer committed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Error from OpenAI: Error committing input audio buffer: buffer too small. Expected at least 100ms of audio, but buffer only has 0.00ms of audio.
gpt_spanish_response.agi: Error
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024018, ts 533953351, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024019, ts 533953511, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024020, ts 533953671, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024021, ts 533953831, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024022, ts 533953991, len 000160)
Got  RTP packet from    217.61.226.139:19917 (type 00, seq 024023, ts 533954151, len 000160)
<PJSIP/2000-00000001>AGI Rx << VERBOSE Audio streaming to OpenAI completed.
gpt_spanish_response.agi: Audio
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << HANGUP
<PJSIP/2000-00000001>AGI Tx >> 200 result=1
<PJSIP/2000-00000001>AGI Rx << VERBOSE Call hung up successfully.
1 Like

did you have any luck in making this work?

I did actually, but this was like a couple weeks a go. Only got it working today… it was a though one.

Can you tell us what you did to get this working? I have a similar problem.

Sure. What problem do you have?