The gpt-4o-realtime-preview model does not identify the voice of a recording and I don’t know what I’m doing wrong , the method “sendWsOpenAi_Text” work fine ,but, when i use the method “sendWsOpenAi_Audio” the ia answers me with this: “I’m sorry but I can’t identify the voice in a recording”, what is happening?
function connectWsOpenAi(handleRealtimeText,handleRealtimeAudio)
{
this.ws = new WebSocket(
this.urlWs,
undefined,
{
headers: {
Authorization: 'Bearer ' + this.apiKey,
"OpenAI-Beta": "realtime=v1",
},
}
);
this.ws.onopen = () => {
this.ws.send(JSON.stringify({
type: "session.update",
session: {
modalities: ["text", "audio"],
instructions: "Hablas español mexicano , porfavor asiste al usuario.",
voice: "alloy",
input_audio_transcription: {
model: "whisper-1"
},
turn_detection:null
}
}));
};
this.ws.onmessage = (message) => {
//Obtiene eñ resultado del mensaje
const result = JSON.parse(message.data);
switch (result.type) {
case 'response.text.delta':
//Concatena el mensaje por partes de la IA
this.messagesText = this.messagesText + result.delta;
//Retorna el texto en un estado
handleRealtimeText(this.messagesText);
break;
case 'response.text.done':
//Borra el mensaje local
this.messagesText = "";
break;
case 'response.audio_transcript.delta':
//Concatena el mensaje por partes de la IA
this.messagesText = this.messagesText + result.delta;
//Retorna el texto en un estado
handleRealtimeText(this.messagesText);
break;
case 'response.audio.delta':
//Convierte el audio de respuesta a buffer
//const audioData = Buffer.from(result.delta, 'base64');
//Retorna el audio en un estado
handleRealtimeAudio(result.delta);
break;
case 'response.audio.done':
//Borra el mensaje local
this.messagesText = "";
break;
}
};
this.ws.onerror = (e) => {
console.log("ERROR");
};
this.ws.onclose = (e) => {
console.log("CLOSE");
};
}
function sendWsOpenAi_Text(prompt){
const event = {
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [
{
type: 'input_text',
text: prompt
}
]
}
};
this.ws.send(JSON.stringify(event));
this.ws.send(JSON.stringify({type: 'response.create'}));
}
async function sendWsOpenAi_Audio(uri){
const base64AudioData = await filePathToBase64(uri);
const event = {
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [
{
type: 'input_audio',
audio: base64AudioData
}
]
}
};
this.ws.send(JSON.stringify(event));
this.ws.send(JSON.stringify({type: 'response.create'}));
}