I’m trying to first just get transcriptions working, and eventually deploy to a cloud service, but for now, simply test transcriptions. The idea is to go, grab the first podcast, from the RSS feed, and transcribe that. I’ve tried using GPT4 to help with the code, and I’ve tried Code Interpreter (or Advanced Data Analysis) and just can’t seem to get anywhere. What could I be doing wrong?
Here is the code I’m using:
import base64
import feedparser
import openai
import os
import requests
import ssl
import urllib.request
# If you're facing issues with SSL certificate verification, use this:
ssl._create_default_https_context = ssl._create_unverified_context
def download_episode(episode_url, local_path):
"""
Download the podcast episode from the provided URL and save it to the local path.
"""
local_audio_file = f"{local_path}/episode.mp3"
with requests.get(episode_url, stream=True) as r:
r.raise_for_status()
with open(local_audio_file, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_audio_file
def transcribe_podcast(rss_url, local_path):
# Parse the RSS feed
intelligence_feed = feedparser.parse(rss_url, request_headers={"User-Agent": "Mozilla/5.0"})
if not intelligence_feed.entries:
print("No entries found in the RSS feed.")
return
# Get the first episode's audio URL
episode_url = intelligence_feed.entries[0].links[0].href
# Download the episode
local_audio_file = download_episode(episode_url, local_path)
# Convert audio to base64
with open(local_audio_file, "rb") as f:
base64_audio = base64.b64encode(f.read()).decode('utf-8')
# Make a direct API call to the OpenAI Whisper endpoint
headers = {
'Authorization': f'OPENAI_API_KEY', # Make sure to replace with your API key
'Content-Type': 'application/json',
}
data = {
'audio': base64_audio
}
response = requests.post('https://api.openai.com/v1/whisper/asr', headers=headers, json=data)
if response.status_code == 200:
transcription = response.json().get('transcription', '')
print("Transcription:", transcription)
else:
print("Error:", response.text)
print(response.json())
if __name__ == "__main__":
print("Starting Podcast Transcription Function")
rss_url = "https://feeds.simplecast.com/uSa3prhz"
local_path = "./"
transcribe_podcast(rss_url, local_path)
And here is the error I’m getting when I try to run it:
Starting Podcast Transcription Function
Error: {
“error”: {
“message”: “Invalid URL (POST /v1/whisper/asr)”,
“type”: “invalid_request_error”,
“param”: null,
“code”: null
}
}
{‘error’: {‘message’: ‘Invalid URL (POST /v1/whisper/asr)’, ‘type’: ‘invalid_request_error’, ‘param’: None, ‘code’: None}}