I want to convert audio to text by whisper model but I can’t do it.
class TextView(APIView):
def post(self,request,*args, **kwargs):
if ‘audio_file’ in request.data:
audio_blob = request.data.get(‘audio_file’)
input_text = self.generate_text(audio_blob)
return input_text
def generate_text(self, inp_audio):
model_id='whisper-1'
response=openai.Audio.transcribe(
api_key=config('OPENAI_API_KEY'),
model=model_id,
file=inp_audio
)
return response['text']
When I used process_audio_blob function then I got accurate results.
def process_audio_blob(self, audio_blob):
recognizer = sr.Recognizer()
audio_bytes = audio_blob.read()
audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes))
wav_data = audio_segment.export(format="wav").read()
try:
with io.BytesIO(wav_data) as wav_io:
audio_data = sr.AudioData(wav_io.read(
), sample_rate=audio_segment.frame_rate, sample_width=audio_segment.sample_width)
text = recognizer.recognize_google(audio_data, language='en-US')
return text
except:
return 'None'
According to the process_audio_blob function, I try to whisper like below but it still doesn’t work . Also, this function can’t generate text.
def generate_text(self, audio_blob):
model_id='whisper-1'
audio_bytes = audio_blob.read()
audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes))
wav_data = audio_segment.export(format="wav").read()
try:
with io.BytesIO(wav_data) as wav_io:
audio_data = sr.AudioData(wav_io.read(
), sample_rate=audio_segment.frame_rate, sample_width=audio_segment.sample_width)
response = openai.Audio.transcribe(
api_key=config('OPENAI_API_KEY'),
model=model_id,
file=audio_data
)
print(response['text'])
return response['text']
except Exception as e: # Catch any exception related to transcription
print(f"Error during transcription: {e}")
return None
I have recorded audio by this , “react-voice-visualizer” and code for recording
import React, { useEffect } from “react”;
import { useVoiceVisualizer, VoiceVisualizer } from “react-voice-visualizer”;
import Product from “services/Product”;
const Record = ({
setLoading,
setSpeech,
setSearchedKeyWord,
setProductList,
}) => {
const recorderControls = useVoiceVisualizer();
const { stopRecording, recordedBlob, error, audioRef } = recorderControls;
useEffect(() => {
const sendAudioToAPI = async () => {
stopRecording();
if (recordedBlob) {
setLoading(true);
try {
let formData = new FormData();
formData.append("audio_file", recordedBlob);
const response = await Product.audioProduct(formData);
if (response.status === 200) {
setLoading(false);
setSpeech(response?.input_text);
// setSearchedKeyWord(response?.products);
setProductList(response?.results);
console.log("Audio sent successfully!");
} else {
setProductList([]);
setLoading(false);
console.error("Failed to send audio.");
}
} catch (error) {
console.error("Error sending audio:", error);
}
}
};
if (recordedBlob) {
sendAudioToAPI();
}
if (error) {
console.log("Error:", error);
}
}, [recordedBlob]);
// useEffect(() => {
// let interval = setTimeout(() => {
// stopRecording();
// }, 30000);
// return () => clearTimeout(interval);
// }, []);
return (
<div className='w-full relative'>
<div className=''>
<VoiceVisualizer
// onlyRecording={true}
// backgroundColor="white"
mainBarColor='green'
secondaryBarColor='red'
controls={recorderControls}
speed={5}
ref={audioRef}
// animateCurrentPick={true}
defaultAudioWaveIconColor='green'
defaultMicrophoneIconColor='red'
isControlPanelShown={true}
barWidth={2}
// isDefaultUIShown={false}
isProgressIndicatorShown={true}
canvasContainerClassName='mb-5'
progressIndicatorClassName='className'
fullscreen={true}
controlButtonsClassName=''
progressIndicatorTimeClassName='hidden'
recordingTime={30}
duration={30}
isProgressIndicatorTimeShown={false}
/>
</div>
</div>
);
};
export default Record;