You’ll need to update to the latest python openai library, and then you can use new client methods as seen in the API reference (sidebar of the forum).
Here’s my start at making TTS a bit more object-oriented. More than an example, less than something to be your go-to ‘import tts_utils’ yet.
'''
text-to-speech example with API, openai > 1.2.2
'''
import time
from pathlib import Path
from openai import OpenAI
class botDate:
""" .start/.now : object creation date/time; current date/time (formatted)
.set/.get : start/reset timer, return elapsed seconds
"""
def __init__(self, format_spec="%Y-%m-%d %H:%M%p"):
self.format_spec = format_spec
self.created_time = time.time()
self.start_time = time.perf_counter()
def start(self):
return self.format_time(self.created_time)
def now(self):
return self.format_time(time.time())
def format_time(self, epoch_seconds):
formatted_time = time.strftime(self.format_spec, time.localtime(epoch_seconds))
return formatted_time
def set(self):
'''Record the current time when .set is called'''
self.start_time = time.perf_counter()
def get(self):
'''seconds since object creation, or since .set was called'''
elapsed_time = time.perf_counter() - self.start_time
return round(elapsed_time, 3)
class ttsFile:
def __init__(self, text="x",
voice="alloy",
response_format="mp3",
file_prefix="tts",
hd=False
):
self.timeout = 240
self.timer = botDate("%Y%m%d_%H%M%S")
self.input = text
self.voice = voice
self.response_format = response_format
self.file_prefix = file_prefix
self.model = "tts-1-hd" if hd else "tts-1"
self.client = OpenAI(timeout = self.timeout)
def _setparams(self):
self.params = {
"voice": self.voice,
"model": self.model,
"response_format": self.response_format,
"input": self.input,
}
def tofile(self, text):
self.input = text
filename = (f"{self.file_prefix}_{self.model}_"
f"{self.voice}_{self.timer.now()}.{self.response_format}")
out_file = Path(__file__).parent / filename
self.timer.set()
self._setparams()
self.client = OpenAI(timeout = self.timeout)
response = self.client.audio.speech.create(**self.params)
response.stream_to_file(out_file)
# with open("out_file", "wb") as file:
# file.write(response.content) # alternate file create
cost = round(len(self.input) *
(3/1000 if self.params['model'] == "tts-1-hd" else 1.5/1000),4)
return {"time": self.timer.get(), # just some stats
"length": len(self.input),
"cost": cost,
"filename": filename,
"response": response}
if __name__ == "__main__":
tts = ttsFile(hd=False) # hd is twice the price, change to True
voice = { # my gender/heaviness rank from 1: male to 10: female
1: 'nova', # 10
2: 'shimmer', # 9
3: 'fable', # 6 (English accent)
4: 'alloy', # 6
5: 'echo', # 2
6: 'onyx', # 1
}
# tts.voice = voice[2] # pick a number
# tts.response_format = "flac" # of 'mp3', 'opus', 'aac', 'flac'
# tts.file_prefix = "tts" # file naming: (prefix)_tts-1_alloy_20231113_102550.flac
prompt = """
[pause]
The big black bug bit the brown bear.
"""
input = ". " + prompt.strip().replace('\n', " ")
tts.voice = voice[2]
results = tts.tofile(input)
print(results['filename'] + f" took {results['time']:0.2f} seconds")
print(f"{results['length']} characters, cost {results['cost']} cents.")