whisper.cpp en tiempo real con tiny

This commit is contained in:
2025-06-19 15:50:54 -03:00
parent 9908d84b8c
commit 77f87c3655
5 changed files with 227 additions and 93 deletions

View File

@ -7,114 +7,137 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
def render(assigns) do
~H"""
<div id="container">
<div id="status">Presioná "Start Recording"…</div>
<button id="startButton">Start Recording</button>
<button id="stopButton" disabled>Stop Recording</button>
<div id="container">
<div id="status">Presioná "Start Recording"…</div>
<button id="startButton">Start Recording</button>
<button id="stopButton" disabled>Stop Recording</button>
<div id="transcriptionContainer">
<div id="transcription" class="realtime"></div>
</div>
<div id="transcriptionContainer">
<div id="transcription" class="realtime"></div>
</div>
<div id="fullTextContainer">
<div id="fullText"></div>
</div>
<script type="module">
import { Socket } from "https://cdn.skypack.dev/phoenix";
<script type="module">
import { Socket } from "https://cdn.skypack.dev/phoenix";
const statusDiv = document.getElementById("status");
const transcriptionDiv = document.getElementById("transcription");
const startButton = document.getElementById("startButton");
const stopButton = document.getElementById("stopButton");
const statusDiv = document.getElementById("status");
const transcriptionDiv = document.getElementById("transcription");
const fullTextDiv = document.getElementById("fullText");
const startButton = document.getElementById("startButton");
const stopButton = document.getElementById("stopButton");
let socket, channel;
let audioContext, mediaStream, mediaProcessor;
let socket, channel;
let audioContext, mediaStream, mediaProcessor;
async function startRecording() {
startButton.disabled = true;
stopButton.disabled = false;
statusDiv.textContent = "Recording…";
transcriptionDiv.textContent = "";
fullTextDiv.textContent = "";
socket = new Socket("ws://localhost:4000/socket");
socket.connect();
channel = socket.channel("data:lobby");
channel.join()
.receive("ok", () => {
statusDiv.textContent = "🎙 Conectado a Phoenix STT";
console.log("Canal conectado");
})
.receive("error", () => {
statusDiv.textContent = "❌ Error al conectar";
console.error("Error al conectar canal");
});
channel.on("realtime", payload => {
const words = payload.text.split(" ");
const lastWord = words.pop();
transcriptionDiv.innerHTML = `${words.join(" ")} <span class="last-word">${lastWord}</span>`;
});
channel.on("fullSentence", payload => {
fullTextDiv.innerHTML += payload.text + " ";
async function startRecording() {
startButton.disabled = true;
stopButton.disabled = false;
statusDiv.textContent = "🎙 Grabando…";
transcriptionDiv.innerHTML = "";
});
audioContext = new AudioContext();
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
const input = audioContext.createMediaStreamSource(mediaStream);
socket = new Socket("ws://localhost:4000/socket");
socket.connect();
mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
mediaProcessor.onaudioprocess = (event) => {
const float32Array = event.inputBuffer.getChannelData(0);
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
int16Array[i] = Math.max(-1, Math.min(1, float32Array[i])) * 0x7FFF;
}
channel = socket.channel("data:lobby");
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
channel.push("audio_chunk", {
data: base64Audio,
sample_rate: audioContext.sampleRate
channel.join()
.receive("ok", () => {
statusDiv.textContent = "✅ Conectado a Phoenix STT";
console.log("Canal conectado");
})
.receive("error", () => {
statusDiv.textContent = "❌ Error al conectar canal";
console.error("Error al conectar canal");
});
// Realtime parcial (palabras mientras habla)
let partialTranscript = "";
channel.on("realtime", payload => {
const words = payload.text.split(" ");
const lastWord = words.pop();
const rest = words.join(" ");
if (rest.length > 0) {
partialTranscript += rest + " ";
}
transcriptionDiv.innerHTML = `
${partialTranscript}<span class="last-word">${lastWord}</span>
`;
});
};
input.connect(mediaProcessor);
mediaProcessor.connect(audioContext.destination);
}
function stopRecording() {
stopButton.disabled = true;
startButton.disabled = false;
statusDiv.textContent = "🛑 Grabación detenida.";
// Frase completa (después de procesar chunks)
channel.on("transcription", payload => {
const sentence = payload.text.trim();
if (sentence.length > 0) {
partialTranscript = ""; // reseteamos el parcial
const span = document.createElement("div");
span.className = "sentence";
span.textContent = sentence;
transcriptionDiv.appendChild(span);
transcriptionDiv.innerHTML += "<br />";
}
});
// ✅ Enviamos evento especial para guardar
if (channel) {
channel.push("save_audio", {});
// Audio setup
audioContext = new AudioContext();
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
const input = audioContext.createMediaStreamSource(mediaStream);
mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
mediaProcessor.onaudioprocess = (event) => {
const float32Array = event.inputBuffer.getChannelData(0);
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
channel.push("audio_chunk", {
data: base64Audio,
sample_rate: audioContext.sampleRate
});
};
input.connect(mediaProcessor);
mediaProcessor.connect(audioContext.destination);
}
if (mediaProcessor) mediaProcessor.disconnect();
if (audioContext) audioContext.close();
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
if (channel) channel.leave();
if (socket) socket.disconnect();
}
function stopRecording() {
stopButton.disabled = true;
startButton.disabled = false;
statusDiv.textContent = "🛑 Grabación detenida.";
document.getElementById("startButton").onclick = startRecording;
document.getElementById("stopButton").onclick = stopRecording;
</script>
if (mediaProcessor) mediaProcessor.disconnect();
if (audioContext) audioContext.close();
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
if (channel) channel.leave();
if (socket) socket.disconnect();
}
<style>
.last-word {
font-weight: bold;
color: orange;
}
</style>
</div>
document.getElementById("startButton").onclick = startRecording;
document.getElementById("stopButton").onclick = stopRecording;
</script>
<style>
.last-word {
font-weight: bold;
color: orange;
}
#transcriptionContainer {
margin-top: 1rem;
font-family: sans-serif;
font-size: 1.1rem;
}
.sentence {
margin-bottom: 0.5rem;
}
</style>
</div>
"""
end
end