socket already connected,the server returns:
{"type":"transcription_session.created","event_id":"event_BcDKtTDUdeFyOyWMs47Ih","session":{"id":"sess_BcDKtax3dByqsWhB5qwDq","object":"realtime.transcription_session","expires_at":1748449227,"input_audio_noise_reduction":null,"turn_detection":{"type":"server_vad","threshold":0.5,"prefix_padding_ms":300,"silence_duration_ms":200},"input_audio_format":"pcm16","input_audio_transcription":null,"client_secret":null,"include":null}}
When I start recording and send the iOS microphone recording data, the server returns
{"type":"input_audio_buffer.speech_started","event_id":"event_BcDT5OFY89Xu0CrTaunvl","audio_start_ms":31764,"item_id":"item_BcDT5FKyHr4KcuMi5mSYa"}
{"type":"input_audio_buffer.speech_stopped","event_id":"event_BcDT5psuRv8h46Z0LchIX","audio_end_ms":32352,"item_id":"item_BcDT5FKyHr4KcuMi5mSYa"}
{"type":"input_audio_buffer.committed","event_id":"event_BcDT5Mp1ButDL7IAWEf6U","previous_item_id":null,"item_id":"item_BcDT5FKyHr4KcuMi5mSYa"}
{"type":"conversation.item.created","event_id":"event_BcDT5YLLBURAxfBM0HrTz","previous_item_id":null,"item":{"id":"item_BcDT5FKyHr4KcuMi5mSYa","object":"realtime.item","type":"message","status":"completed","role":"user","content":[{"type":"input_audio","transcript":null}]}}
{"type":"input_audio_buffer.speech_started","event_id":"event_BcDT6uvclm1kRRQCkK6hA","audio_start_ms":33716,"item_id":"item_BcDT6pi2wZH30hzis5HIB"}
{"type":"input_audio_buffer.speech_stopped","event_id":"event_BcDT6NM13R6ud4WKTrWpA","audio_end_ms":34592,"item_id":"item_BcDT6pi2wZH30hzis5HIB"}
{"type":"input_audio_buffer.committed","event_id":"event_BcDT647bymtMuvkjtA26Q","previous_item_id":"item_BcDT5FKyHr4KcuMi5mSYa","item_id":"item_BcDT6pi2wZH30hzis5HIB"}
{"type":"conversation.item.created","event_id":"event_BcDT6Iu6bMtWhQutC71al","previous_item_id":"item_BcDT5FKyHr4KcuMi5mSYa","item":{"id":"item_BcDT6pi2wZH30hzis5HIB","object":"realtime.item","type":"message","status":"completed","role":"user","content":[{"type":"input_audio","transcript":null}]}}
The recording data conversion method is as follows:
inputNode.installTap(onBus: 0, bufferSize: 4096, format: format) { [weak self] buffer, time in
guard let self = self else { return }
guard let int16Data = self.downsampleAndConvertToPCM16(buffer: buffer, inputFormat: format) else {
return
}
let base64Audio = int16Data.base64EncodedString()
let message: [String: Any] = [
"type": "input_audio_buffer.append",
"audio": base64Audio
]
if let jsonData = try? JSONSerialization.data(withJSONObject: message),
let jsonString = String(data: jsonData, encoding: .utf8) {
self.socket?.write(string: jsonString)
}
}
func downsampleAndConvertToPCM16(buffer: AVAudioPCMBuffer, inputFormat: AVAudioFormat) -> Data? {
let outputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 24000,
channels: 1,
interleaved: true)!
let converter = AVAudioConverter(from: inputFormat, to: outputFormat)!
let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: AVAudioFrameCount(outputFormat.sampleRate) * 2)!
var error: NSError?
let inputBlock: AVAudioConverterInputBlock = { _, outStatus in
outStatus.pointee = .haveData
return buffer
}
converter.convert(to: outputBuffer, error: &error, withInputFrom: inputBlock)
guard error == nil, let convertedData = outputBuffer.int16ChannelData else {
print("转换失败: \(String(describing: error))")
return nil
}
let frameLength = Int(outputBuffer.frameLength)
let data = Data(bytes: convertedData.pointee, count: frameLength * 2)
return data
}
When I send input_audio_buffer.commit, the server returns:
{"type":"error","event_id":"event_BcDTA3vEhwpQYpbo9M6hR","error":{"type":"invalid_request_error","code":"input_audio_buffer_commit_empty","message":"Error committing input audio buffer: buffer too small. Expected at least 100ms of audio, but buffer only has 0.00ms of audio.","param":null,"event_id":null}}