AttributeError: type object 'Audio' has no attribute 'speech'

Hi,
Nothing seems to solve the missing attribute speech. Is there something I’m doing wrong?

Below is my python.
#StartCut
import os
import openai
from pathlib import Path
from packaging import version
current_version = version.parse(openai.version)
print (current_version) #PRINTS 0.28.0
openai.api_key = os.getenv(“OPENAI_API_KEY”)
file = ‘TextToSpeech.ipynb’ #in interactive we need this.
speech_file_path = Path(file).parent / “speech.mp3”

#Fails here
response = openai.Audio.speech.create(
model=“tts-1”,
voice=“alloy”,
input=“Today is a wonderful day to build something people love!”
)
response.stream_to_file(speech_file_path)
#EndCut

You’ll need to update to the latest python openai library, and then you can use new client methods as seen in the API reference (sidebar of the forum).

Here’s my start at making TTS a bit more object-oriented. More than an example, less than something to be your go-to ‘import tts_utils’ yet.

'''
text-to-speech example with API, openai > 1.2.2
'''
import time
from pathlib import Path
from openai import OpenAI

class botDate:
    """ .start/.now : object creation date/time; current date/time (formatted)
        .set/.get   : start/reset timer, return elapsed seconds
    """
    def __init__(self, format_spec="%Y-%m-%d %H:%M%p"):
        self.format_spec = format_spec
        self.created_time = time.time()
        self.start_time = time.perf_counter()

    def start(self):
        return self.format_time(self.created_time)

    def now(self):
        return self.format_time(time.time())

    def format_time(self, epoch_seconds):
        formatted_time = time.strftime(self.format_spec, time.localtime(epoch_seconds))
        return formatted_time

    def set(self):
        '''Record the current time when .set is called'''
        self.start_time = time.perf_counter() 

    def get(self):
        '''seconds since object creation, or since .set was called'''
        elapsed_time = time.perf_counter() - self.start_time
        return round(elapsed_time, 3)

class ttsFile:
    def __init__(self, text="x",
                 voice="alloy",
                 response_format="mp3",
                 file_prefix="tts",
                 hd=False
                ):
        self.timeout = 240
        self.timer = botDate("%Y%m%d_%H%M%S")
        self.input = text
        self.voice = voice
        self.response_format = response_format
        self.file_prefix = file_prefix
        self.model = "tts-1-hd" if hd else "tts-1"
        self.client = OpenAI(timeout = self.timeout)

    def _setparams(self):
        self.params = {
            "voice": self.voice,
            "model": self.model,
            "response_format": self.response_format,
            "input": self.input,
            }
        
    def tofile(self, text):
        self.input = text
        filename = (f"{self.file_prefix}_{self.model}_"
                   f"{self.voice}_{self.timer.now()}.{self.response_format}")
        out_file = Path(__file__).parent / filename
        self.timer.set()
        self._setparams()
        self.client = OpenAI(timeout = self.timeout)
        response = self.client.audio.speech.create(**self.params)
        response.stream_to_file(out_file)
        # with open("out_file", "wb") as file:
             # file.write(response.content) # alternate file create
        cost = round(len(self.input) *
                     (3/1000 if self.params['model'] == "tts-1-hd" else 1.5/1000),4)
        return {"time": self.timer.get(),  # just some stats
                "length": len(self.input),
                "cost": cost,
                "filename": filename,
                "response": response} 

if __name__ == "__main__":
    tts = ttsFile(hd=False)  # hd is twice the price, change to True

    voice = {         # my gender/heaviness rank from 1: male to 10: female
        1: 'nova',     # 10
        2: 'shimmer',  # 9
        3: 'fable',    # 6 (English accent)
        4: 'alloy',    # 6
        5: 'echo',     # 2
        6: 'onyx',     # 1
        }
    # tts.voice = voice[2]  # pick a number
    # tts.response_format = "flac"  # of 'mp3', 'opus', 'aac', 'flac'
    # tts.file_prefix = "tts"  # file naming: (prefix)_tts-1_alloy_20231113_102550.flac

    prompt = """
    [pause]
    The big black bug bit the brown bear.
    """

    input = ". " + prompt.strip().replace('\n', " ")

    tts.voice = voice[2]
    results = tts.tofile(input)
    print(results['filename'] + f" took {results['time']:0.2f} seconds")
    print(f"{results['length']} characters, cost {results['cost']} cents.")
2 Likes

You have to keep trying to make sure and get the right version installed.
Run from the Visual Studio Terminal in the directory where your jupyter (.ipynb) file lives.
pip install openai==1.3.7

Thank you, this did help me because I realized that it had to be the openai version.

from pathlib import Path
from openai import OpenAI
client = OpenAI()

speech_file_path = Path(file).parent / “speech.mp3”
response = client.audio.speech.create(
model=“tts-1”,
voice=“alloy”,
input=“Today is a wonderful day to build something people love!”
)

response.stream_to_file(speech_file_path)