I am using the ‘gpt-4o-realtime-preview’ API to create a realtime audio session using web socket for an iOS app. I get the following error:
nw_flow_service_reads [C1 <ip> failed parent-flow (satisfied (Path is satisfied), interface: en0[802.11], proxy, uses wifi)] No output handler
nw_flow_add_write_request [C1 <ip> failed parent-flow (satisfied (Path is satisfied), interface: en0[802.11], proxy, uses wifi)] cannot accept write requests
nw_write_request_report [C1] Send failed with error "Socket is not connected"
Error Domain=NSPOSIXErrorDomain Code=57 "Socket is not connected" UserInfo={NSErrorFailingURLStringKey=wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview, NSErrorFailingURLKey=wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview}
My code looks like this:
static let shared = RealtimeService()
private init() {}
func beginSession(topic: String,
prompt: String,
eta: TimeInterval,
onAudio: ((Data) -> Void)? = nil) async throws {
// Always start from a blank state.
stop()
try await openSocket()
// 1️⃣ wait for `session.created` ------------------------------------------------
try await waitFor(eventType: "session.created")
// 2️⃣ send `session.update` -----------------------------------------------------
let fullPrompt = <my prompt>
let sessionUpdate: [String: Any] = [
"event_id": UUID().uuidString,
"type": "session.update",
"session": [
"modalities": ["audio", "text"],
"voice": "alloy",
"instructions": fullPrompt,
"output_audio_format": "pcm16"
]
]
try await send(json: sessionUpdate)
// 3️⃣ wait for server confirmation (`session.updated`) --------------------------
try await waitFor(eventType: "session.updated")
// 4️⃣ send `response.create` ----------------------------------------------------
let responseCreate: [String: Any] = [
"event_id": UUID().uuidString,
"type": "response.create",
"response": [
"modalities": ["audio", "text"],
"voice": "alloy"
]
]
try await send(json: responseCreate)
// 5️⃣ start local audio engine --------------------------------------------------
let pcm16_mono_24k = AVAudioFormat(commonFormat: .pcmFormatInt16,
sampleRate: 24_000,
channels: 1,
interleaved: true)!
try Player.shared.startStreaming(format: pcm16_mono_24k,
title: topic)
// 6️⃣ background receive loop ---------------------------------------------------
Task.detached { [weak self] in
await self?.receiveLoop(tap: onAudio)
}
}
/// Gracefully end the current session, shut down audio, and release the
/// WebSocket.
func stop() {
webSocket?.cancel(with: .goingAway, reason: nil)
webSocket = nil
PodcastPlayer.shared.stop()
}
private var webSocket: URLSessionWebSocketTask?
private var isConnected: Bool { webSocket != nil }
/// Establish the authenticated WebSocket connection.
private func openSocket() async throws {
var comps = URLComponents(string: "wss://api.openai.com/v1/realtime")!
comps.queryItems = [URLQueryItem(name: "model", value: "gpt-4o-realtime-preview")]
var req = URLRequest(url: comps.url!)
req.addValue("Bearer \(API.openAIKey)", forHTTPHeaderField: "Authorization")
req.addValue("realtime=v1", forHTTPHeaderField: "OpenAI-Beta")
webSocket = URLSession.shared.webSocketTask(with: req)
webSocket?.resume()
}
/// Send any JSON dictionary as a *text* WebSocket frame
private func send(json object: [String: Any]) async throws {
let data = try JSONSerialization.data(withJSONObject: object)
guard let text = String(data: data, encoding: .utf8) else {
throw URLError(.cannotParseResponse)
}
try await webSocket?.send(.string(text))
}
/// Block until we receive a server event of the specified `type`.
private func waitFor(eventType wanted: String) async throws {
guard let ws = webSocket else { throw URLError(.badServerResponse) }
while true {
let message = try await ws.receive()
if let obj = try? decode(message), obj["type"] as? String == wanted {
return
}
}
}
private func receiveLoop(tap: ((Data) -> Void)?) async {
guard let ws = webSocket else { return }
// We stay in the loop until the socket errors or is intentionally
// cancelled by `stop()`.
do {
while true {
let message = try await ws.receive()
guard let obj = try? decode(message) else { continue }
// We only care about audio streaming events for now.
if obj["type"] as? String == "response.content_part.added",
let part = obj["part"] as? [String: Any],
part["type"] as? String == "audio",
let b64 = part["audio"] as? String,
let raw = Data(base64Encoded: b64) {
// AVAudioEngine must be touched on the main thread.
await MainActor.run {
try? Player.shared.playStreamChunk(data: raw)
tap?(raw)
}
}
}
} catch {
print("[RealtimeService] receiveLoop error:", error)
stop()
}
}
/// Convert the WebSocket message into a generic `[String:Any]`.
private func decode(_ message: URLSessionWebSocketTask.Message) throws -> [String: Any] {
switch message {
case .string(let text):
if let data = text.data(using: .utf8) {
return (try JSONSerialization.jsonObject(with: data)) as? [String: Any] ?? [:]
}
case .data(let data):
return (try JSONSerialization.jsonObject(with: data)) as? [String: Any] ?? [:]
default:
break
}
return [:]
}
I am running Xcode 16.3 with iOS 18.4/17.5 simulator.
Can someone please help me out here?