I am using chat gpt realtime transcription but its not working .
This is my code .Please help me to find issue.
public async start(): Promise<void> {
try {
this.ws = new WebSocket(`wss://api.openai.com/v1/realtime?intent=transcription`, [
'realtime',
`openai-insecure-api-key.${configEnv.OPENAI_API_KEY}`,
'openai-beta.realtime-v1',
]);
this.ws.onopen = () => {
console.log('Connected to OpenAI realtime API');
this.reconnectAttempts = 0;
if (this.ws?.readyState === 1) {
this.ws.send(
JSON.stringify({
type: 'transcription_session.update',
session: {
input_audio_transcription: {
model: 'whisper-1',
language: 'en',
// prompt:
// "Transcribe the user's speech and translate it into English accurately while maintaining the original meaning and correct grammar.",
},
turn_detection: {
prefix_padding_ms: 600,
silence_duration_ms: 800,
type: 'server_vad',
threshold: 0.5,
},
input_audio_noise_reduction: { type: 'near_field' },
include: ['item.input_audio_transcription.logprobs'],
},
}),
);
}
};
this.ws.onmessage = (event: MessageEvent) => {
const data = JSON.parse(event.data);
console.log(data, 'handleMessage');
// this.handleMessage(data);
};
this.ws.onerror = (error: Event) => {
console.error('WebSocket error:', error);
// this.onError(`Connection error: ${error}`);
// this.stop();
// this.reconnect();
};
this.ws.onclose = () => {
console.log('WebSocket connection closed');
// if (!this.isManualStop) {
// this.stop();
// this.reconnect();
// }
};
await this.initAudioProcessing();
} catch (error) {
console.error('Error starting transcription:', error);
// this.onError(error instanceof Error ? error.message : 'Unknown error');
// this.stop();
}
}
private async initAudioProcessing(): Promise<void> {
try {
const config: MediaStreamConstraints = { audio: true };
config.audio = {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
};
this.stream = await navigator.mediaDevices.getUserMedia(config);
const context = new AudioContext({ sampleRate: 24000, latencyHint: 'balanced' });
const source = context.createMediaStreamSource(this.stream);
await context.audioWorklet.addModule(this.scriptSrc);
const worklet = new AudioWorkletNode(context, 'audio-recorder-worklet');
source.connect(worklet);
worklet.connect(context.destination);
let audioQueue = new Int16Array(0);
worklet.port.onmessage = (event) => {
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
const buffer = new Int16Array(event.data.audio_data);
audioQueue = this.mergeBuffers(audioQueue, buffer) as any;
const neededSamples = Math.floor(context.sampleRate * 0.1); // 100ms
if (audioQueue.length >= neededSamples) {
const sendBuffer = audioQueue.subarray(0, neededSamples);
audioQueue = audioQueue.subarray(neededSamples);
const base64Audio = this.encodeInt16ArrayToBase64(sendBuffer);
console.log('base64Audio', base64Audio);
this.ws.send(JSON.stringify({ type: 'input_audio_buffer.append', audio: base64Audio }));
}
};
this.transcriptionContext = context;
this.workletNode = worklet;
} catch (err) {
console.error('Audio processing init failed:', err);
throw err;
}
}
mergeBuffers(lhs: Int16Array, rhs: Int16Array): Int16Array {
const mergedBuffer = new Int16Array(lhs.length + rhs.length);
mergedBuffer.set(lhs, 0);
mergedBuffer.set(rhs, lhs.length);
// console.log('mergedBuffer33', mergedBuffer);
return mergedBuffer;
}
encodeInt16ArrayToBase64(int16Array: Int16Array): string {
// Directly use the Int16Array's underlying buffer
const bytes = new Uint8Array(int16Array.buffer);
// Chunked processing for large arrays (avoids call stack limits)
const chunkSize = 0x8000; // 32KB chunks
let binary = '';
for (let i = 0; i < bytes.length; i += chunkSize) {
const chunk = bytes.subarray(i, Math.min(i + chunkSize, bytes.length));
binary += String.fromCharCode(...chunk);
}
return btoa(binary);
}
OR if you can please suggest easy way to do this.