I saw that OpenAI published an endpoint for TextToSpeech, but I could only find a sample for Node.js and one for Python, using some installs on my Mac. Don’t want that
So, I wrote a bit of code in Swift that produces a file with the spoken text.
No guarantee!! But it works for me!
- Please add your own error handling and remove the print statements!
- Please give the file a name, now it is a generated name.
- The file is a tmp file. It is probably an MP3 file or something.
- Or use the stream to play the resulting speech in your app.
- You can remove the organisation if you are a one-person-team.
import Foundation
class OpenAITTS {
private enum constants {
enum openAI {
static let url = URL(string: "https://api.openai.com/v1/audio/speech")
static let apiKey = "<your apiKey here>"
static let organisation = "<your organisation ID here>"
}
}
private var urlSession: URLSession = {
let configuration = URLSessionConfiguration.default
let session = URLSession(configuration: configuration)
return session
}()
func speak(_ text: String) {
guard let request = self.request(text) else {
print("No request")
return
}
self.send(request: request)
}
private func send(request: URLRequest) {
let task = self.urlSession.downloadTask(with: request) { urlOrNil, responseOrNil, errorOrNil in
if let errorOrNil {
print(errorOrNil)
return
}
if let response = responseOrNil as? HTTPURLResponse {
print(response.statusCode)
}
guard let fileURL = urlOrNil else { return }
do {
let documentsURL = try
FileManager.default.url(for: .documentDirectory,
in: .userDomainMask,
appropriateFor: nil,
create: false)
let savedURL = documentsURL.appendingPathComponent(fileURL.lastPathComponent)
print(savedURL)
try FileManager.default.moveItem(at: fileURL, to: savedURL)
} catch {
print ("file error: \(error)")
}
}
task.resume()
}
private func request(_ text: String) -> URLRequest? {
guard let baseURL = Self.constants.openAI.url else {
return nil
}
let request = NSMutableURLRequest(url: baseURL)
let parameters: [String: Any] = [
"model": "tts-1",
"voice": "nova",
"response_format": "mp3",
"speed": "0.98", // hidden feature in OpenAI TTS! Range: 0.25 - 4.0, Default 1.0
"input": text
]
request.addValue("Bearer \(Self.constants.openAI.apiKey)", forHTTPHeaderField: "Authorization")
request.addValue(Self.constants.openAI.organisation, forHTTPHeaderField: "OpenAI-Organization") // Optional
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpMethod = "POST"
if let jsonData = try? JSONSerialization.data(withJSONObject: parameters, options: .prettyPrinted) {
request.httpBody = jsonData
}
return request as URLRequest
}
}