AttributeError: type object 'Audio' has no attribute 'speech'

You’ll need to update to the latest python openai library, and then you can use new client methods as seen in the API reference (sidebar of the forum).

Here’s my start at making TTS a bit more object-oriented. More than an example, less than something to be your go-to ‘import tts_utils’ yet.

'''
text-to-speech example with API, openai > 1.2.2
'''
import time
from pathlib import Path
from openai import OpenAI

class botDate:
    """ .start/.now : object creation date/time; current date/time (formatted)
        .set/.get   : start/reset timer, return elapsed seconds
    """
    def __init__(self, format_spec="%Y-%m-%d %H:%M%p"):
        self.format_spec = format_spec
        self.created_time = time.time()
        self.start_time = time.perf_counter()

    def start(self):
        return self.format_time(self.created_time)

    def now(self):
        return self.format_time(time.time())

    def format_time(self, epoch_seconds):
        formatted_time = time.strftime(self.format_spec, time.localtime(epoch_seconds))
        return formatted_time

    def set(self):
        '''Record the current time when .set is called'''
        self.start_time = time.perf_counter() 

    def get(self):
        '''seconds since object creation, or since .set was called'''
        elapsed_time = time.perf_counter() - self.start_time
        return round(elapsed_time, 3)

class ttsFile:
    def __init__(self, text="x",
                 voice="alloy",
                 response_format="mp3",
                 file_prefix="tts",
                 hd=False
                ):
        self.timeout = 240
        self.timer = botDate("%Y%m%d_%H%M%S")
        self.input = text
        self.voice = voice
        self.response_format = response_format
        self.file_prefix = file_prefix
        self.model = "tts-1-hd" if hd else "tts-1"
        self.client = OpenAI(timeout = self.timeout)

    def _setparams(self):
        self.params = {
            "voice": self.voice,
            "model": self.model,
            "response_format": self.response_format,
            "input": self.input,
            }
        
    def tofile(self, text):
        self.input = text
        filename = (f"{self.file_prefix}_{self.model}_"
                   f"{self.voice}_{self.timer.now()}.{self.response_format}")
        out_file = Path(__file__).parent / filename
        self.timer.set()
        self._setparams()
        self.client = OpenAI(timeout = self.timeout)
        response = self.client.audio.speech.create(**self.params)
        response.stream_to_file(out_file)
        # with open("out_file", "wb") as file:
             # file.write(response.content) # alternate file create
        cost = round(len(self.input) *
                     (3/1000 if self.params['model'] == "tts-1-hd" else 1.5/1000),4)
        return {"time": self.timer.get(),  # just some stats
                "length": len(self.input),
                "cost": cost,
                "filename": filename,
                "response": response} 

if __name__ == "__main__":
    tts = ttsFile(hd=False)  # hd is twice the price, change to True

    voice = {         # my gender/heaviness rank from 1: male to 10: female
        1: 'nova',     # 10
        2: 'shimmer',  # 9
        3: 'fable',    # 6 (English accent)
        4: 'alloy',    # 6
        5: 'echo',     # 2
        6: 'onyx',     # 1
        }
    # tts.voice = voice[2]  # pick a number
    # tts.response_format = "flac"  # of 'mp3', 'opus', 'aac', 'flac'
    # tts.file_prefix = "tts"  # file naming: (prefix)_tts-1_alloy_20231113_102550.flac

    prompt = """
    [pause]
    The big black bug bit the brown bear.
    """

    input = ". " + prompt.strip().replace('\n', " ")

    tts.voice = voice[2]
    results = tts.tofile(input)
    print(results['filename'] + f" took {results['time']:0.2f} seconds")
    print(f"{results['length']} characters, cost {results['cost']} cents.")
2 Likes