Whether whisper or o1, this always comes down to the cloud host provider hanging up on inactive connections after 60 seconds or so. It is not OpenAI.
However, I do have to say that Whisper is underperforming.
Let’s say, for example, I take an audio file that is over three and a half hours, compress it with opus into an ogg format file that is 19,999,886 bytes. Less than the 20MB cutoff. Sounds ridiculous, but it has been successful before.
Run a node.js whisper script that can do a minute in a few seconds. Override the node.js library timeout to 15 minutes (900 seconds). wait. wait…
Starting audio translation...
Translation failed after 931.580 seconds
Error message: Connection error.
Error details: APIConnectionError: Connection error.
at OpenAI.makeRequest (file:///C:/chat/js/node_modules/openai/core.mjs:316:19)
at process.processTicksAndRejections (node:internal/process/task_queues:105:5)
at async translateAudio (file:///C:/chat/js/whisper.js:30:25) {
status: undefined,
headers: undefined,
request_id: undefined,
error: undefined,
code: undefined,
param: undefined,
type: undefined,
cause: FetchError: request to https://api.openai.com/v1/audio/translations failed, reason: read ECONNRESET
at ClientRequest.<anonymous> (C:\chat\js\node_modules\node-fetch\lib\index.js:1501:11)
at ClientRequest.emit (node:events:518:28)
at emitErrorEvent (node:_http_client:103:11)
at TLSSocket.socketErrorListener (node:_http_client:506:5)
at TLSSocket.emit (node:events:530:35)
at emitErrorNT (node:internal/streams/destroy:170:8)
at emitErrorCloseNT (node:internal/streams/destroy:129:3)
at process.processTicksAndRejections (node:internal/process/task_queues:90:21) {
type: 'system',
errno: 'ECONNRESET',
code: 'ECONNRESET'
}
}
The important part is that I wasn’t disconnected after a minute. There was nothing for fifteen minutes while it was chugging away. (did they not leave 2.2GB to unpack the audio?..)
Script to see when you fail, if it is a telltale minute - and to save the transcript to the same directory as your input audio file name at the bottom.
// whisper.js
import fs from 'fs';
import dotenv from 'dotenv';
import OpenAI from 'openai';
import path from 'path';
// Load environment variables from .env file
dotenv.config();
const API_KEY = process.env.OPENAI_API_KEY;
// Create an instance of the OpenAI API client
const openai = new OpenAI({
timeout: 900 * 1000, // timeout seconds * ms - API key from env
});
async function translateAudio(filename) {
// Resolve the full path of the audio file
const filePath = path.resolve(filename);
console.log('Starting audio translation...');
const startTime = Date.now();
try {
// Create a read stream of the audio file
const audioStream = fs.createReadStream(filePath);
// Send the audio file to OpenAI for transcription
const translation = await openai.audio.translations.create({
file: audioStream,
model: 'whisper-1',
language: "en",
prompt: "Our Radio show begins!",
response_format: "json",
});
// Calculate elapsed time
const endTime = Date.now();
const elapsedTime = ((endTime - startTime) / 1000).toFixed(3); // Convert milliseconds to seconds
console.log(`Translation succeeded in ${elapsedTime} seconds`);
// Extract the transcript text from the response
const transcriptText = translation.text;
// Generate the output filename by appending '-transcript.txt' to the original filename
const transcriptFilename = `${path.basename(filename, path.extname(filename))}-transcript.txt`;
const transcriptFilePath = path.join(path.dirname(filePath), transcriptFilename);
// Save the transcript text to a file
fs.writeFileSync(transcriptFilePath, transcriptText, 'utf8');
console.log(`Transcript saved to ${transcriptFilePath}`);
// Print the first 160 characters of the transcript for confirmation
const previewLength = 160;
const transcriptPreview = transcriptText.substring(0, previewLength);
console.log(`Transcript Preview (first ${previewLength} characters):\n${transcriptPreview}`);
} catch (error) {
const endTime = Date.now();
const elapsedTime = ((endTime - startTime) / 1000).toFixed(3);
console.error(`Translation failed after ${elapsedTime} seconds`);
// Robust error handling
if (error.response && error.response.data) {
console.error('Error response data:', error.response.data);
} else {
console.error('Error message:', error.message);
}
console.error('Error details:', error);
}
}
const filename = "audiofile.opus.ogg"
// Example usage: pass the filename as an argument
// const filename = process.argv[2];
if (!filename) {
console.error('Please provide a filename as an argument.');
process.exit(1);
}
translateAudio(filename);
(I’ve been waiting… No $1.30 bill yet, just the little trials, so perhaps a platform problem on starting the large files)