omg… that was in times of jquery…
import $ from "jquery";
class AudioProcessor extends AudioWorkletProcessor {
constructor() {
super();
}
process(inputs, outputs) {
const input = inputs[0];
const output = outputs[0];
for (let channel = 0; channel < input.length; ++channel) {
output[channel].set(input[channel]);
}
if (input[0].length > 0) {
const bufferData = input[0].slice(0, input[0].length / 2);
this.port.postMessage(bufferData);
}
return true;
}
}
$.widget("custom.audioRecorder", {
options: {
recordButton: "#recordButton",
decibelLevel: "#decibelLevel",
numberOfCanvases: 6,
silenceTimeout: 3000,
wordEndThreshold: 10,
canvasIdPrefix: "canvas"
},
_create: function() {
this.isRecording = false;
this.audioContext = null;
this.mediaRecorder = null;
this.audioChunks = [];
this.canvases = [];
this.canvasContexts = [];
this._initializeCanvasElements();
$(this.options.recordButton).on("click", this._toggleRecording.bind(this));
},
_initializeCanvasElements: function() {
for (let i = 1; i <= this.options.numberOfCanvases; i++) {
const canvasElement = document.getElementById(`${this.options.canvasIdPrefix}${i}`);
this.canvases.push(canvasElement);
this.canvasContexts.push(canvasElement.getContext("2d"));
}
},
_toggleRecording: async function() {
if (this.isRecording) {
this._stopRecording();
$(this.options.recordButton).html('<i class="fas fa-microphone"></i> Aufnahme starten');
$(this.options.decibelLevel).addClass("hidden");
} else {
await this._startRecording();
$(this.options.recordButton).html('<i class="fas fa-microphone-slash"></i> Aufnahme stoppen');
$(this.options.decibelLevel).removeClass("hidden");
}
this.isRecording = !this.isRecording;
},
_startRecording: async function() {
this.audioContext = new AudioContext();
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this._setupMediaRecorder(stream);
this._setupAudioAnalyser(stream);
},
_setupMediaRecorder: function(stream) {
this.mediaRecorder = new MediaRecorder(stream);
this.mediaRecorder.start(this.options.silenceTimeout);
this.mediaRecorder.addEventListener("dataavailable", (e) => {
this.audioChunks.push(e.data);
});
this.mediaRecorder.addEventListener("stop", async () => {
await this._handleDataTransfer();
if (this.isRecording) {
this.mediaRecorder.start(this.options.silenceTimeout);
}
});
},
_setupAudioAnalyser: function(stream) {
const source = this.audioContext.createMediaStreamSource(stream);
const analyser = this.audioContext.createAnalyser();
analyser.fftSize = 2048;
source.connect(analyser);
this._visualizeAudio(analyser);
this._updateDecibelLevel(analyser);
},
_stopRecording: function() {
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
this.mediaRecorder.stop();
}
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
},
_handleDataTransfer: async function() {
if (this.audioChunks.length > 0) {
const wavFile = await this._createWavFile(this.audioChunks);
await this._sendWavFile(wavFile);
this.audioChunks = [];
}
},
_sendWavFile: async function(wavFile) {
const response = await fetch('/sound/save', {
method: 'POST',
headers: {
'Content-Type': 'application/octet-stream',
},
body: wavFile.buffer,
});
if (response.ok) {
console.log('WAV file sent successfully');
} else {
console.error('Failed to send WAV file:', response.statusText);
}
},
_createWavFile: async function(audioChunks) {
const audioBuffer = new Blob(audioChunks, { type: 'audio/wav' });
const reader = new FileReader();
reader.readAsArrayBuffer(audioBuffer);
return new Promise((resolve) => {
reader.onload = () => {
resolve(new Uint8Array(reader.result));
};
});
},
_visualizeAudio: function(analyser) {
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
for (let i = 0; i < this.options.numberOfCanvases; i++) {
this.canvases[i].width = 30;
this.canvases[i].height = 100;
}
const draw = () => {
if (!this.isRecording) {
return;
}
analyser.getByteFrequencyData(dataArray);
for (let i = 0; i < this.options.numberOfCanvases; i++) {
const canvasContext = this.canvasContexts[i];
canvasContext.clearRect(0, 0, 30, 100);
const barHeight = dataArray[i] * 0.5;
canvasContext.fillStyle = 'rgb(' + (barHeight+100) + ',50,50)';
canvasContext.fillRect(0, 100 - barHeight, 30, barHeight);
}
requestAnimationFrame(draw);
}
draw();
},
_updateDecibelLevel: function(analyser) {
const dataArray = new Uint8Array(analyser.frequencyBinCount);
let inWord = false;
let lastSoundTime = Date.now();
const update = () => {
if (!this.isRecording) {
return;
}
analyser.getByteFrequencyData(dataArray);
let sum = dataArray.reduce((a, b) => a + b, 0);
let average = sum / dataArray.length;
if (average > this.options.wordEndThreshold) {
lastSoundTime = Date.now();
inWord = true;
} else if (inWord && Date.now() - lastSoundTime >= this.options.silenceTimeout) {
inWord = false;
this._handleDataTransfer().then(r => console.log(r));
}
$(this.options.decibelLevel).text(`Decibel Level: ${average}`);
requestAnimationFrame(update);
}
update();
}
});
$(document).ready(function() {
$("body").audioRecorder();
});
registerProcessor('audio-processor', AudioProcessor);
and then I had a shellscript…
#!/bin/bash
# Iterate over each WAV file starting with "recording_"
for file in recording_*.webm; do
# Convert .webm to .wav
OUTPUT_WAV="${file%.webm}.wav"
ffmpeg -i "$file" -acodec pcm_s16le -ac 1 -ar 44100 "$OUTPUT_WAV"
# Extract silence start and end times
SILENCE_OUTPUT=$(ffmpeg -i "$OUTPUT_WAV" -af silencedetect=n=-30dB:d=0.5 -f null - 2>&1)
# Print the entire SILENCE_OUTPUT for debugging
echo "$SILENCE_OUTPUT"
# Extract silence times
FIRST_SILENCE_END=$(echo "$SILENCE_OUTPUT" | grep "silence_end" | awk -F': ' '{print $2}' | awk -F' \\|' '{print $1}' | head -1)
SECOND_SILENCE_START=$(echo "$SILENCE_OUTPUT" | grep "silence_start" | awk -F': ' '{print $2}' | tail -1)
# Debug
echo "Debug: FIRST_SILENCE_END=$FIRST_SILENCE_END"
echo "Debug: SECOND_SILENCE_START=$SECOND_SILENCE_START"
# Calculate duration of non-silent part
if [ "$FIRST_SILENCE_END" ] && [ "$SECOND_SILENCE_START" ]; then
DURATION=$(echo "$SECOND_SILENCE_START - $FIRST_SILENCE_END" | bc)
else
DURATION=0
fi
# Debug
echo "Debug: DURATION=$DURATION"
# Decide on how to process file
if [[ "$DURATION" == "0" ]]; then
# If there's no silence or silence is less than 0.5 seconds, just copy the file with clean_ prefix
cp "$file" "clean_$file"
else
# Extract non-silent part
ffmpeg -i "$file" -ss "$FIRST_SILENCE_END" -t "$DURATION" "clean_$file"
fi
done
Maybe you can use something from it or get anything from it…
I think it also made some sort of frequence analysis… to display pitch and volume or something…
Was working on speaker diarization in javascript for a couple days but gave up at some point after understanding how much work that would mean…