Files
voice_recognition/whisper/assets/js/hooks/vad.js

80 lines
2.7 KiB
JavaScript

import { Socket } from "phoenix";
export const VadHook = {
async mounted() {
const statusDiv = document.getElementById("vad-status");
// Cargar onnxruntime y luego vad-web
const ortScript = document.createElement("script");
ortScript.src = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js";
const vadScript = document.createElement("script");
vadScript.src = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.22/dist/bundle.min.js";
ortScript.onload = () => {
vadScript.onload = async () => {
// Inicializar canal Phoenix
this.socket = new Socket("ws://localhost:4003/socket");
this.socket.connect();
this.channel = this.socket.channel("audio:lobby");
await this.channel.join().receive("ok", () => {
console.log("✅ Canal audio:lobby unido.");
});
// Preparar VAD pero no arrancar aún
this.myvad = await vad.MicVAD.new({
onSpeechStart: () => {
statusDiv.textContent = "🎤 Voz detectada...";
},
onSpeechEnd: async (float32Audio) => {
statusDiv.textContent = "✅ Voz finalizada. Enviando audio...";
await sendAudioChunk(float32Audio, this.channel);
this.channel.push("stop_audio", {});
}
});
// Esperar eventos desde LiveView
this.handleEvent("init-vad", async () => {
await this.myvad.start();
statusDiv.textContent = "🚀 VAD iniciado.";
});
this.handleEvent("stop-vad", async () => {
if (this.myvad) {
await this.myvad.stop();
statusDiv.textContent = "🛑 VAD detenido.";
}
});
};
document.body.appendChild(vadScript);
};
document.body.appendChild(ortScript);
}
};
// Convertir Float32Array a PCM 16-bit
function float32ToInt16(float32Array) {
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
return int16Array;
}
// Enviar audio binario al canal
async function sendAudioChunk(float32Audio, channel) {
const pcm16 = float32ToInt16(float32Audio);
const header = JSON.stringify({ sample_rate: 16000 });
const headerBytes = new TextEncoder().encode(header);
const totalLength = 2 + headerBytes.length + pcm16.byteLength;
const buffer = new ArrayBuffer(totalLength);
const view = new DataView(buffer);
view.setUint16(0, headerBytes.length, true);
new Uint8Array(buffer, 2, headerBytes.length).set(headerBytes);
new Uint8Array(buffer, 2 + headerBytes.length).set(new Uint8Array(pcm16.buffer));
channel.pushBinary(buffer);
}