whisper.cpp en tiempo real con tiny

2025-06-19 15:50:54 -03:00
parent 9908d84b8c
commit 77f87c3655
5 changed files with 227 additions and 93 deletions
--- a/recognition_VAD/lib/recognition_VAD_web/channels/data_channel.ex
+++ b/recognition_VAD/lib/recognition_VAD_web/channels/data_channel.ex
@ -6,8 +6,15 @@ defmodule Recognition_VADWeb.DataChannel do
    {:ok, socket}
  end

+  # Parcial
+  def handle_info({:realtime, msg}, socket) do
+    push(socket, "realtime", msg)
+    {:noreply, socket}
+  end
+
+  # Completo
  def handle_info({:broadcast_audio, msg}, socket) do
-    push(socket, "transcription", Jason.decode!(msg))
+    push(socket, "transcription", msg)
    {:noreply, socket}
  end

@ -15,7 +22,7 @@ defmodule Recognition_VADWeb.DataChannel do
  def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
    case Base.decode64(base64_chunk) do
      {:ok, binary_audio} ->
-        GenServer.cast(Recognition_VAD.AudioProcessor, {:chunk, binary_audio, sample_rate})
+        Recognition_VAD.WhisperStreamer.push_chunk(binary_audio, sample_rate)
        {:noreply, socket}

      :error ->
--- a/recognition_VAD/lib/recognition_VAD_web/live/stt/test_with_channel.ex
+++ b/recognition_VAD/lib/recognition_VAD_web/live/stt/test_with_channel.ex
@ -7,114 +7,137 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do

  def render(assigns) do
    ~H"""
-    <div id="container">
-      <div id="status">Presioná "Start Recording"…</div>
-      <button id="startButton">Start Recording</button>
-      <button id="stopButton" disabled>Stop Recording</button>
+      <div id="container">
+        <div id="status">Presioná "Start Recording"…</div>
+        <button id="startButton">Start Recording</button>
+        <button id="stopButton" disabled>Stop Recording</button>

-      <div id="transcriptionContainer">
-        <div id="transcription" class="realtime"></div>
-      </div>
+        <div id="transcriptionContainer">
+          <div id="transcription" class="realtime"></div>
+        </div>

-      <div id="fullTextContainer">
-        <div id="fullText"></div>
-      </div>
+        <script type="module">
+          import { Socket } from "https://cdn.skypack.dev/phoenix";

-      <script type="module">
-        import { Socket } from "https://cdn.skypack.dev/phoenix";
+          const statusDiv = document.getElementById("status");
+          const transcriptionDiv = document.getElementById("transcription");
+          const startButton = document.getElementById("startButton");
+          const stopButton = document.getElementById("stopButton");

-        const statusDiv = document.getElementById("status");
-        const transcriptionDiv = document.getElementById("transcription");
-        const fullTextDiv = document.getElementById("fullText");
-        const startButton = document.getElementById("startButton");
-        const stopButton = document.getElementById("stopButton");
+          let socket, channel;
+          let audioContext, mediaStream, mediaProcessor;

-        let socket, channel;
-        let audioContext, mediaStream, mediaProcessor;
-
-        async function startRecording() {
-          startButton.disabled = true;
-          stopButton.disabled = false;
-          statusDiv.textContent = "Recording…";
-          transcriptionDiv.textContent = "";
-          fullTextDiv.textContent = "";
-
-          socket = new Socket("ws://localhost:4000/socket");
-          socket.connect();
-
-          channel = socket.channel("data:lobby");
-          channel.join()
-            .receive("ok", () => {
-              statusDiv.textContent = "🎙 Conectado a Phoenix STT";
-              console.log("Canal conectado");
-            })
-            .receive("error", () => {
-              statusDiv.textContent = "❌ Error al conectar";
-              console.error("Error al conectar canal");
-            });
-
-          channel.on("realtime", payload => {
-            const words = payload.text.split(" ");
-            const lastWord = words.pop();
-            transcriptionDiv.innerHTML = `${words.join(" ")} <span class="last-word">${lastWord}</span>`;
-          });
-
-          channel.on("fullSentence", payload => {
-            fullTextDiv.innerHTML += payload.text + " ";
+          async function startRecording() {
+            startButton.disabled = true;
+            stopButton.disabled = false;
+            statusDiv.textContent = "🎙 Grabando…";
            transcriptionDiv.innerHTML = "";
-          });

-          audioContext = new AudioContext();
-          mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
-          const input = audioContext.createMediaStreamSource(mediaStream);
+            socket = new Socket("ws://localhost:4000/socket");
+            socket.connect();

-          mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
-          mediaProcessor.onaudioprocess = (event) => {
-            const float32Array = event.inputBuffer.getChannelData(0);
-            const int16Array = new Int16Array(float32Array.length);
-            for (let i = 0; i < float32Array.length; i++) {
-              int16Array[i] = Math.max(-1, Math.min(1, float32Array[i])) * 0x7FFF;
-            }
+            channel = socket.channel("data:lobby");

-            const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
-            channel.push("audio_chunk", {
-              data: base64Audio,
-              sample_rate: audioContext.sampleRate
+            channel.join()
+              .receive("ok", () => {
+                statusDiv.textContent = "✅ Conectado a Phoenix STT";
+                console.log("Canal conectado");
+              })
+              .receive("error", () => {
+                statusDiv.textContent = "❌ Error al conectar canal";
+                console.error("Error al conectar canal");
+              });
+
+            // Realtime parcial (palabras mientras habla)
+            let partialTranscript = "";
+
+            channel.on("realtime", payload => {
+              const words = payload.text.split(" ");
+              const lastWord = words.pop();
+              const rest = words.join(" ");
+
+              if (rest.length > 0) {
+                partialTranscript += rest + " ";
+              }
+
+              transcriptionDiv.innerHTML = `
+                ${partialTranscript}<span class="last-word">${lastWord}</span>
+              `;
            });
-          };

-          input.connect(mediaProcessor);
-          mediaProcessor.connect(audioContext.destination);
-        }

-        function stopRecording() {
-          stopButton.disabled = true;
-          startButton.disabled = false;
-          statusDiv.textContent = "🛑 Grabación detenida.";
+            // Frase completa (después de procesar chunks)
+            channel.on("transcription", payload => {
+              const sentence = payload.text.trim();
+              if (sentence.length > 0) {
+                partialTranscript = ""; // reseteamos el parcial
+                const span = document.createElement("div");
+                span.className = "sentence";
+                span.textContent = sentence;
+                transcriptionDiv.appendChild(span);
+                transcriptionDiv.innerHTML += "<br />";
+              }
+            });

-          // ✅ Enviamos evento especial para guardar
-          if (channel) {
-            channel.push("save_audio", {});
+
+            // Audio setup
+            audioContext = new AudioContext();
+            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            const input = audioContext.createMediaStreamSource(mediaStream);
+
+            mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
+            mediaProcessor.onaudioprocess = (event) => {
+              const float32Array = event.inputBuffer.getChannelData(0);
+              const int16Array = new Int16Array(float32Array.length);
+              for (let i = 0; i < float32Array.length; i++) {
+                const s = Math.max(-1, Math.min(1, float32Array[i]));
+                int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
+              }
+
+
+              const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
+              channel.push("audio_chunk", {
+                data: base64Audio,
+                sample_rate: audioContext.sampleRate
+              });
+            };
+
+            input.connect(mediaProcessor);
+            mediaProcessor.connect(audioContext.destination);
          }

-          if (mediaProcessor) mediaProcessor.disconnect();
-          if (audioContext) audioContext.close();
-          if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
-          if (channel) channel.leave();
-          if (socket) socket.disconnect();
-        }
+          function stopRecording() {
+            stopButton.disabled = true;
+            startButton.disabled = false;
+            statusDiv.textContent = "🛑 Grabación detenida.";

-        document.getElementById("startButton").onclick = startRecording;
-        document.getElementById("stopButton").onclick = stopRecording;
-      </script>
+            if (mediaProcessor) mediaProcessor.disconnect();
+            if (audioContext) audioContext.close();
+            if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
+            if (channel) channel.leave();
+            if (socket) socket.disconnect();
+          }

-      <style>
-        .last-word {
-          font-weight: bold;
-          color: orange;
-        }
-      </style>
-    </div>
+          document.getElementById("startButton").onclick = startRecording;
+          document.getElementById("stopButton").onclick = stopRecording;
+        </script>
+
+        <style>
+          .last-word {
+            font-weight: bold;
+            color: orange;
+          }
+          #transcriptionContainer {
+            margin-top: 1rem;
+            font-family: sans-serif;
+            font-size: 1.1rem;
+          }
+          .sentence {
+            margin-bottom: 0.5rem;
+          }
+        </style>
+      </div>
    """
  end
+
 end