Real Time Speech To Text API Disconnects Immediately

Hey everyone,

I am trying to do an implementation of the RealTime API using web sockets. https://platform.openai.com/docs/guides/realtime?use-case=transcription&connection-example=ws#connect-with-websockets

But, when I connect to the socket and then start streaming audio through my microphone, it instantly disconnects.

WebSocket closed. Code: 1000

Does anyone know why this occurs? Here is my ReactJS implementation below.

import React, { useState, useEffect, useRef } from "react";

const TranscriptionApp = () => {
  const [isRecording, setIsRecording] = useState(false);
  const [transcription, setTranscription] = useState("");
  const wsRef = useRef(null);
  const mediaRecorderRef = useRef(null);
  const audioContextRef = useRef(null);

  const API_KEY = "REDACTED";
  const WS_URL = "wss://api.openai.com/v1/realtime?intent=transcription";

  const startRecording = async () => {
    try {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

      audioContextRef.current = new AudioContext();
      const source = audioContextRef.current.createMediaStreamSource(stream);
      const processor = audioContextRef.current.createScriptProcessor(4096, 1, 1);

      wsRef.current = new WebSocket(WS_URL, [
        "realtime",
        `openai-insecure-api-key.${API_KEY}`,
        "openai-beta.realtime-v1",
      ]);

      wsRef.current.onopen = () => {
        console.log("Connected to OpenAI Realtime API");
        
      };
      

      wsRef.current.onmessage = (event) => {
        const data = JSON.parse(event.data);
        if (data.text) {
          setTranscription((prev) => prev + " " + data.text);
        }
      };

      wsRef.current.onerror = (error) => {
        console.error("WebSocket Error:", error);
      };
      
      wsRef.current.onclose = (event) => {
        console.warn(
          `WebSocket closed. Code: ${event.code}, Reason: ${event.reason}, Clean: ${event.wasClean}`
        );
      };

      processor.onaudioprocess = (event) => {
        if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) {
          const inputData = event.inputBuffer.getChannelData(0);
          const audioData = new Float32Array(inputData);
          wsRef.current.send(audioData);
        }
      };

      source.connect(processor);
      processor.connect(audioContextRef.current.destination);
      mediaRecorderRef.current = processor;

      setIsRecording(true);
    } catch (error) {
      console.error("Error accessing microphone:", error);
    }
  };

  const stopRecording = () => {
    if (mediaRecorderRef.current) {
      mediaRecorderRef.current.disconnect();
      audioContextRef.current.close();
    }

    if (wsRef.current) {
      wsRef.current.close();
    }

    setIsRecording(false);
  };

  return (
    <div style={{ padding: "20px", fontFamily: "Arial" }}>
      <h1>Live Transcription</h1>
      <button onClick={isRecording ? stopRecording : startRecording}>
        {isRecording ? "Stop Recording" : "Start Recording"}
      </button>
      <div
        style={{
          marginTop: "20px",
          padding: "10px",
          border: "1px solid #ccc",
          minHeight: "100px",
          whiteSpace: "pre-wrap",
        }}
      >
        {transcription || "Transcribed text will appear here..."}
      </div>
    </div>
  );
};

export default TranscriptionApp;

Any help on this would be really appreciated!

Thanks

This might be of help webrtc-transcription-guide possible solution for ws in the replies.