I’m experiencing an issue with my code for TTS generation. While the TTS output itself seems fine when I check it, the resulting audio after processing ends up as noise. I suspect the problem lies in the conversion of the TTS output to Base64 format:
async generate(gptReply, interactionCount) {
const { partialResponseIndex, partialResponse } = gptReply;
if (!partialResponse) { return; }
try {
const response = await fetch(
'https://api.openai.com/v1/audio/speech',
{
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: partialResponse,
model: 'tts-1',
voice: 'alloy',
response_format: 'opus',
}),
}
);
if (response.status === 200) {
try {
const audioArrayBuffer = await response.arrayBuffer();
const base64String = Buffer.from(audioArrayBuffer).toString('base64');
this.emit('speech', partialResponseIndex, base64String, partialResponse, interactionCount);
} catch (err) {
console.error('Error converting audio to Base64:', err);
}
} else {
console.error('OpenAI TTS error:', response);
}
} catch (err) {
console.error('Error occurred in TextToSpeech service:', err);
}
}
I would appreciate any advice or insights that can help resolve this issue.