Hello, I’m trying to use the speech API to stream PCM data from fetch request but when I play the chunks received they play scrambled noises alongside of the voice,
Could it be a bug with OpenAI PCM data? has someone experienced that?
Demo: Vimeo ID: 1039061425?share=copy (can’t paste links)
I’m using Dart/Flutter with soloud package to play buffered audio.
import 'dart:async';
import 'package:example/tts_service_web.dart';
import 'package:flutter/material.dart';
import 'package:flutter_soloud/flutter_soloud.dart';
void main() async {
WidgetsFlutterBinding.ensureInitialized();
/// Initialize the player.
await SoLoud.instance.init();
runApp(const MyApp());
}
class MyApp extends StatelessWidget {
const MyApp({super.key});
@override
Widget build(BuildContext context) {
return const MaterialApp(
home: AudioStreamScreen(),
);
}
}
class AudioStreamScreen extends StatefulWidget {
const AudioStreamScreen({super.key});
@override
State<AudioStreamScreen> createState() => _AudioStreamScreenState();
}
class _AudioStreamScreenState extends State<AudioStreamScreen> {
final openAIKey = 'OPEN_AI_KEY';
@override
void initState() {
super.initState();
}
@override
void dispose() {
unawaited(SoLoud.instance.disposeAllSources());
super.dispose();
}
Future<void> _fetchAndPlayAudio() async {
final stream = TTSServiceWeb(openAIKey).tts(
'https://api.openai.com/v1/audio/speech',
{
'model': 'tts-1',
'voice': 'alloy',
'speed': 1,
'input':
'''1. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.''',
'response_format': 'pcm',
"sample_rate": 16000,
'stream': true,
},
);
final currentSound = SoLoud.instance.setBufferStream(
maxBufferSize: 1024 * 1024 * 5, // 2 MB
sampleRate: 16000,
channels: Channels.mono,
pcmFormat: BufferPcmType.s16le,
onBuffering: (isBuffering, handle, time) async {
debugPrint('buffering');
},
);
int chunkNumber = 0;
stream.listen((chunk) async {
try {
SoLoud.instance.addAudioDataStream(
currentSound,
chunk,
);
if (chunkNumber == 0) {
await SoLoud.instance.play(currentSound);
}
chunkNumber++;
print('chunk number: $chunkNumber');
print('chunk length: ${chunk.length}');
} on SoLoudPcmBufferFullCppException {
debugPrint('pcm buffer full or stream already set '
'to be ended');
} catch (e) {
debugPrint(e.toString());
}
}, onDone: () {
SoLoud.instance.setDataIsEnded(currentSound);
});
}
@override
Widget build(BuildContext context) {
return Scaffold(
appBar: AppBar(
title: const Text('Audio Stream Example'),
),
body: Center(
child: ElevatedButton(
onPressed: _fetchAndPlayAudio,
child: const Text('Play Audio'),
),
),
);
}
}