whisper.cpp en tiempo real con tiny
This commit is contained in:
@ -6,8 +6,15 @@ defmodule Recognition_VADWeb.DataChannel do
|
||||
{:ok, socket}
|
||||
end
|
||||
|
||||
# Parcial
|
||||
def handle_info({:realtime, msg}, socket) do
|
||||
push(socket, "realtime", msg)
|
||||
{:noreply, socket}
|
||||
end
|
||||
|
||||
# Completo
|
||||
def handle_info({:broadcast_audio, msg}, socket) do
|
||||
push(socket, "transcription", Jason.decode!(msg))
|
||||
push(socket, "transcription", msg)
|
||||
{:noreply, socket}
|
||||
end
|
||||
|
||||
@ -15,7 +22,7 @@ defmodule Recognition_VADWeb.DataChannel do
|
||||
def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
|
||||
case Base.decode64(base64_chunk) do
|
||||
{:ok, binary_audio} ->
|
||||
GenServer.cast(Recognition_VAD.AudioProcessor, {:chunk, binary_audio, sample_rate})
|
||||
Recognition_VAD.WhisperStreamer.push_chunk(binary_audio, sample_rate)
|
||||
{:noreply, socket}
|
||||
|
||||
:error ->
|
||||
|
@ -7,114 +7,137 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
|
||||
|
||||
def render(assigns) do
|
||||
~H"""
|
||||
<div id="container">
|
||||
<div id="status">Presioná "Start Recording"…</div>
|
||||
<button id="startButton">Start Recording</button>
|
||||
<button id="stopButton" disabled>Stop Recording</button>
|
||||
<div id="container">
|
||||
<div id="status">Presioná "Start Recording"…</div>
|
||||
<button id="startButton">Start Recording</button>
|
||||
<button id="stopButton" disabled>Stop Recording</button>
|
||||
|
||||
<div id="transcriptionContainer">
|
||||
<div id="transcription" class="realtime"></div>
|
||||
</div>
|
||||
<div id="transcriptionContainer">
|
||||
<div id="transcription" class="realtime"></div>
|
||||
</div>
|
||||
|
||||
<div id="fullTextContainer">
|
||||
<div id="fullText"></div>
|
||||
</div>
|
||||
<script type="module">
|
||||
import { Socket } from "https://cdn.skypack.dev/phoenix";
|
||||
|
||||
<script type="module">
|
||||
import { Socket } from "https://cdn.skypack.dev/phoenix";
|
||||
const statusDiv = document.getElementById("status");
|
||||
const transcriptionDiv = document.getElementById("transcription");
|
||||
const startButton = document.getElementById("startButton");
|
||||
const stopButton = document.getElementById("stopButton");
|
||||
|
||||
const statusDiv = document.getElementById("status");
|
||||
const transcriptionDiv = document.getElementById("transcription");
|
||||
const fullTextDiv = document.getElementById("fullText");
|
||||
const startButton = document.getElementById("startButton");
|
||||
const stopButton = document.getElementById("stopButton");
|
||||
let socket, channel;
|
||||
let audioContext, mediaStream, mediaProcessor;
|
||||
|
||||
let socket, channel;
|
||||
let audioContext, mediaStream, mediaProcessor;
|
||||
|
||||
async function startRecording() {
|
||||
startButton.disabled = true;
|
||||
stopButton.disabled = false;
|
||||
statusDiv.textContent = "Recording…";
|
||||
transcriptionDiv.textContent = "";
|
||||
fullTextDiv.textContent = "";
|
||||
|
||||
socket = new Socket("ws://localhost:4000/socket");
|
||||
socket.connect();
|
||||
|
||||
channel = socket.channel("data:lobby");
|
||||
channel.join()
|
||||
.receive("ok", () => {
|
||||
statusDiv.textContent = "🎙 Conectado a Phoenix STT";
|
||||
console.log("Canal conectado");
|
||||
})
|
||||
.receive("error", () => {
|
||||
statusDiv.textContent = "❌ Error al conectar";
|
||||
console.error("Error al conectar canal");
|
||||
});
|
||||
|
||||
channel.on("realtime", payload => {
|
||||
const words = payload.text.split(" ");
|
||||
const lastWord = words.pop();
|
||||
transcriptionDiv.innerHTML = `${words.join(" ")} <span class="last-word">${lastWord}</span>`;
|
||||
});
|
||||
|
||||
channel.on("fullSentence", payload => {
|
||||
fullTextDiv.innerHTML += payload.text + " ";
|
||||
async function startRecording() {
|
||||
startButton.disabled = true;
|
||||
stopButton.disabled = false;
|
||||
statusDiv.textContent = "🎙 Grabando…";
|
||||
transcriptionDiv.innerHTML = "";
|
||||
});
|
||||
|
||||
audioContext = new AudioContext();
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const input = audioContext.createMediaStreamSource(mediaStream);
|
||||
socket = new Socket("ws://localhost:4000/socket");
|
||||
socket.connect();
|
||||
|
||||
mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
|
||||
mediaProcessor.onaudioprocess = (event) => {
|
||||
const float32Array = event.inputBuffer.getChannelData(0);
|
||||
const int16Array = new Int16Array(float32Array.length);
|
||||
for (let i = 0; i < float32Array.length; i++) {
|
||||
int16Array[i] = Math.max(-1, Math.min(1, float32Array[i])) * 0x7FFF;
|
||||
}
|
||||
channel = socket.channel("data:lobby");
|
||||
|
||||
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
|
||||
channel.push("audio_chunk", {
|
||||
data: base64Audio,
|
||||
sample_rate: audioContext.sampleRate
|
||||
channel.join()
|
||||
.receive("ok", () => {
|
||||
statusDiv.textContent = "✅ Conectado a Phoenix STT";
|
||||
console.log("Canal conectado");
|
||||
})
|
||||
.receive("error", () => {
|
||||
statusDiv.textContent = "❌ Error al conectar canal";
|
||||
console.error("Error al conectar canal");
|
||||
});
|
||||
|
||||
// Realtime parcial (palabras mientras habla)
|
||||
let partialTranscript = "";
|
||||
|
||||
channel.on("realtime", payload => {
|
||||
const words = payload.text.split(" ");
|
||||
const lastWord = words.pop();
|
||||
const rest = words.join(" ");
|
||||
|
||||
if (rest.length > 0) {
|
||||
partialTranscript += rest + " ";
|
||||
}
|
||||
|
||||
transcriptionDiv.innerHTML = `
|
||||
${partialTranscript}<span class="last-word">${lastWord}</span>
|
||||
`;
|
||||
});
|
||||
};
|
||||
|
||||
input.connect(mediaProcessor);
|
||||
mediaProcessor.connect(audioContext.destination);
|
||||
}
|
||||
|
||||
function stopRecording() {
|
||||
stopButton.disabled = true;
|
||||
startButton.disabled = false;
|
||||
statusDiv.textContent = "🛑 Grabación detenida.";
|
||||
// Frase completa (después de procesar chunks)
|
||||
channel.on("transcription", payload => {
|
||||
const sentence = payload.text.trim();
|
||||
if (sentence.length > 0) {
|
||||
partialTranscript = ""; // reseteamos el parcial
|
||||
const span = document.createElement("div");
|
||||
span.className = "sentence";
|
||||
span.textContent = sentence;
|
||||
transcriptionDiv.appendChild(span);
|
||||
transcriptionDiv.innerHTML += "<br />";
|
||||
}
|
||||
});
|
||||
|
||||
// ✅ Enviamos evento especial para guardar
|
||||
if (channel) {
|
||||
channel.push("save_audio", {});
|
||||
|
||||
// Audio setup
|
||||
audioContext = new AudioContext();
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
const input = audioContext.createMediaStreamSource(mediaStream);
|
||||
|
||||
mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
|
||||
mediaProcessor.onaudioprocess = (event) => {
|
||||
const float32Array = event.inputBuffer.getChannelData(0);
|
||||
const int16Array = new Int16Array(float32Array.length);
|
||||
for (let i = 0; i < float32Array.length; i++) {
|
||||
const s = Math.max(-1, Math.min(1, float32Array[i]));
|
||||
int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||||
}
|
||||
|
||||
|
||||
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
|
||||
channel.push("audio_chunk", {
|
||||
data: base64Audio,
|
||||
sample_rate: audioContext.sampleRate
|
||||
});
|
||||
};
|
||||
|
||||
input.connect(mediaProcessor);
|
||||
mediaProcessor.connect(audioContext.destination);
|
||||
}
|
||||
|
||||
if (mediaProcessor) mediaProcessor.disconnect();
|
||||
if (audioContext) audioContext.close();
|
||||
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
|
||||
if (channel) channel.leave();
|
||||
if (socket) socket.disconnect();
|
||||
}
|
||||
function stopRecording() {
|
||||
stopButton.disabled = true;
|
||||
startButton.disabled = false;
|
||||
statusDiv.textContent = "🛑 Grabación detenida.";
|
||||
|
||||
document.getElementById("startButton").onclick = startRecording;
|
||||
document.getElementById("stopButton").onclick = stopRecording;
|
||||
</script>
|
||||
if (mediaProcessor) mediaProcessor.disconnect();
|
||||
if (audioContext) audioContext.close();
|
||||
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
|
||||
if (channel) channel.leave();
|
||||
if (socket) socket.disconnect();
|
||||
}
|
||||
|
||||
<style>
|
||||
.last-word {
|
||||
font-weight: bold;
|
||||
color: orange;
|
||||
}
|
||||
</style>
|
||||
</div>
|
||||
document.getElementById("startButton").onclick = startRecording;
|
||||
document.getElementById("stopButton").onclick = stopRecording;
|
||||
</script>
|
||||
|
||||
<style>
|
||||
.last-word {
|
||||
font-weight: bold;
|
||||
color: orange;
|
||||
}
|
||||
#transcriptionContainer {
|
||||
margin-top: 1rem;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
.sentence {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
</style>
|
||||
</div>
|
||||
"""
|
||||
end
|
||||
|
||||
end
|
||||
|
Reference in New Issue
Block a user