Hey, I’m working with the OpenAI API and I’m trying to convert this script into using the Whisper API, and I can’t figure out how to make it function the same. What I mean by functioning the same is always listening using speechrecognition (r.listen()) so I won’t have to press a button or trigger the “recording” to start talking to the bot.
Code:
from openai import OpenAI
import os
import pyaudio
import speech_recognition as sr
from pathlib import Path
from playsound import playsound
client = OpenAI(
api_key=‘nicetry;)’
)
messages1 = [{‘role’: ‘system’, ‘content’: ‘You are a person texting. Try to keep the responses short.’}]
while True:
r = sr.Recognizer()
mic = sr.Microphone()
with mic as source:
print("[listening]")
try:
audio = r.listen(source, timeout=5) # Adjust timeout as needed
prompt = r.recognize_google(audio)
print("you said: " + prompt)
except sr.WaitTimeoutError:
print("[listening]")
continue
except sr.UnknownValueError:
print("[listening]")
continue
if prompt.lower() == 'quit':
break
usrmsg = {'role': 'user', 'content': prompt + ' '}
messages1.append(usrmsg)
print("[loading. . .]")
completion = client.chat.completions.create(
model='gpt-3.5-turbo', messages=messages1
)
text = completion.choices[0].message.content
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text,
)
# Assuming 'output.mp3' is the file you want to write to
output_file_path = Path("output.mp3")
with output_file_path.open("wb") as file:
file.write(response.content)
print(text)
# Play the generated audio
playsound(str(output_file_path));
if os.path.exists("output.mp3"):
os.remove("output.mp3");
gptmsg = {'role': 'assistant', 'content': text + ' '}
messages1.append(gptmsg)
print(“Bye! See you later!”)
Whisper Code (from docs):
from openai import OpenAI
client = OpenAI()
audio_file= open(“/path/to/file/audio.mp3”, “rb”)
transcript = client.audio.transcriptions.create(
model=“whisper-1”,
file=audio_file
)