Hi guys!
I need your help as I am currently working on a project where I want to transcribe multiple files using the Whisper API regardless of their size.
The code works for all files below 25 MB but if the file is larger than 25 MB I get an APIError (API Disconnect), even if I chunked the file into small files.
I read multiple discussions here and used multiple approaches to evade this problem but I get the same problem each time:
- Converting the file into a .ogg file (which resulted in a file of like 4 MB instead of 50 MB) and transcribing this file resulted in the given problem
- Setting a 1 second stop at the end of the script between API requests didn’t help
- (current approach in the code below) = Chunking the audio files bigger than 20 MB into multiple chunks each being a maximum of 20 MB, transcribing each chunk and appending it to a temporary variable before doing the same with the next chunk, then using the content inside this temporary variable for the transcription file → doesn’t work either
I honestly don’t know what to do. I want to do this project for a potential client but neither me nor ChatGPT know how to resolve this problem.
Here’s my code:
def split_audio(file_path, target_size_mb=20, format='mp3'):
audio = AudioSegment.from_file(file_path, format=format)
# 1 Byte = 8 Bits, daher 128 kbps = 128000 bits/s = 16000 Bytes/s
estimated_bitrate = 128000 # in bps
bytes_per_second = estimated_bitrate / 8
max_segment_duration_ms = int((target_size_mb * 1024 * 1024 / bytes_per_second) * 1000)
chunks = []
for i in range(0, len(audio), max_segment_duration_ms):
chunk = audio[i:i+max_segment_duration_ms]
chunk_name = f"{file_path}_part{i//max_segment_duration_ms}.{format}"
chunk.export(chunk_name, format=format)
chunks.append(chunk_name)
return chunks
def transcribe_audio(file_path):
with open(file_path, "rb") as audio_file:
response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file)
return response.text
def process_directory(directory_path):
output_dir = os.path.join(directory_path, "transcriptions")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for file_name in os.listdir(directory_path):
if file_name.endswith('.mp3'):
file_path = os.path.join(directory_path, file_name)
file_size = os.path.getsize(file_path)
complete_transcription=""
if file_size > 20 * 1024 * 1024:
parts = split_audio(file_path)
for part in parts:
part_transcription=transcribe_audio(part)
complete_transcription += part_transcriptions + " "
os.remove(part)
else:
complete_transcription = transcribe_audio(file_path)
with open(os.path.join(output_dir, file_name.replace('.mp3', '.txt')), 'w') as f:
f.write(complete_transcription)
# Path to folder that contains the audio files
directory_path = # hidden/ not relevant for you
process_directory(directory_path)