correccion VAD en formato binario pcm16 - agrego transcripcion al live

2025-08-01 21:22:11 +00:00
parent e43a8c01a7
commit 976e350436
11 changed files with 133 additions and 123 deletions
--- a/whisper/_build/dev/lib/whisper/.mix/compile.elixir
+++ b/whisper/_build/dev/lib/whisper/.mix/compile.elixir
--- a/whisper/_build/dev/lib/whisper/ebin/Elixir.Whisper.Application.beam
+++ b/whisper/_build/dev/lib/whisper/ebin/Elixir.Whisper.Application.beam
--- a/whisper/_build/dev/lib/whisper/ebin/Elixir.WhisperWeb.AudioChannel.beam
+++ b/whisper/_build/dev/lib/whisper/ebin/Elixir.WhisperWeb.AudioChannel.beam
--- a/whisper/_build/dev/lib/whisper/ebin/Elixir.WhisperWeb.VadLive.beam
+++ b/whisper/_build/dev/lib/whisper/ebin/Elixir.WhisperWeb.VadLive.beam
--- a/whisper/_build/dev/lib/whisper/ebin/whisper.app
+++ b/whisper/_build/dev/lib/whisper/ebin/whisper.app
@ -1,8 +1,8 @@
 {application,whisper,
-             [{modules,['Elixir.AudioBuffer','Elixir.AudioFilesList',
+             [{modules,['Elixir.AudioBuffer','Elixir.AudioSaver',
-                        'Elixir.AudioSaver','Elixir.Whisper',
+                        'Elixir.Whisper','Elixir.Whisper.Application',
-                        'Elixir.Whisper.Application','Elixir.Whisper.Counter',
+                        'Elixir.Whisper.Counter','Elixir.Whisper.LargeModel',
-                        'Elixir.Whisper.LargeModel','Elixir.Whisper.Mailer',
+                        'Elixir.Whisper.Mailer',
                        'Elixir.Whisper.RealtimeModel',
                        'Elixir.Whisper.SendToModel',
                        'Elixir.Whisper.Transcriber','Elixir.WhisperWeb',
--- a/whisper/assets/js/hooks/vad.js
+++ b/whisper/assets/js/hooks/vad.js
@ -4,7 +4,6 @@ export const VadHook = {
  async mounted() {
    const statusDiv = document.getElementById("vad-status");
    // Cargar onnxruntime y luego vad-web
    const ortScript = document.createElement("script");
    ortScript.src = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js";
@ -13,7 +12,6 @@ export const VadHook = {
    ortScript.onload = () => {
      vadScript.onload = async () => {
        // Inicializar canal Phoenix
        this.socket = new Socket("ws://localhost:4003/socket");
        this.socket.connect();
        this.channel = this.socket.channel("audio:lobby");
@ -21,40 +19,32 @@ export const VadHook = {
          console.log("✅ Canal audio:lobby unido.");
        });
-        // Preparar VAD pero no arrancar aún
+        const myvad = await vad.MicVAD.new({
        this.myvad = await vad.MicVAD.new({
          onSpeechStart: () => {
            statusDiv.textContent = "🎤 Voz detectada...";
          },
          onSpeechEnd: async (float32Audio) => {
            statusDiv.textContent = "✅ Voz finalizada. Enviando audio...";
            // Enviar el audio correctamente formateado
            await sendAudioChunk(float32Audio, this.channel);
            // Indicar stop si querés (como payload vacío JSON)
            this.channel.push("stop_audio", {});
          }
        });
-        // Esperar eventos desde LiveView
+        myvad.start();
-        this.handleEvent("init-vad", async () => {
+        statusDiv.textContent = "🚀 VAD iniciado.";
          await this.myvad.start();
          statusDiv.textContent = "🚀 VAD iniciado.";
        });
        this.handleEvent("stop-vad", async () => {
          if (this.myvad) {
            await this.myvad.stop();
            statusDiv.textContent = "🛑 VAD detenido.";
          }
        });
      };
      document.body.appendChild(vadScript);
    };
    document.body.appendChild(ortScript);
  }
 };
-// Convertir Float32Array a PCM 16-bit
+// Función de helper para enviar el chunk
 function float32ToInt16(float32Array) {
  const int16Array = new Int16Array(float32Array.length);
  for (let i = 0; i < float32Array.length; i++) {
@ -64,16 +54,24 @@ function float32ToInt16(float32Array) {
  return int16Array;
 }
 // Enviar audio binario al canal
 async function sendAudioChunk(float32Audio, channel) {
  const pcm16 = float32ToInt16(float32Audio);
  const header = JSON.stringify({ sample_rate: 16000 });
  const headerBytes = new TextEncoder().encode(header);
-  const totalLength = 2 + headerBytes.length + pcm16.byteLength;
+  const audioBytes = new Uint8Array(pcm16.buffer); // same as merged in el otro ejemplo
  const totalLength = 2 + headerBytes.length + audioBytes.length;
  const buffer = new ArrayBuffer(totalLength);
  const view = new DataView(buffer);
-  view.setUint16(0, headerBytes.length, true);
+
  // Encabezado: longitud en big endian
  view.setUint16(0, headerBytes.length, false); // <== big endian
  // Copiar header y audio al buffer
  new Uint8Array(buffer, 2, headerBytes.length).set(headerBytes);
-  new Uint8Array(buffer, 2 + headerBytes.length).set(new Uint8Array(pcm16.buffer));
+  new Uint8Array(buffer, 2 + headerBytes.length).set(audioBytes);
-  channel.pushBinary(buffer);
+
  // Enviar el buffer binario
  channel.push("audio_chunk", buffer);
  console.log("📤 Chunk binario enviado");
 }
--- a/whisper/lib/whisper/application.ex
+++ b/whisper/lib/whisper/application.ex
@ -26,8 +26,8 @@ defmodule Whisper.Application do
      {Phoenix.PubSub, name: Whisper.PubSub},
      WhisperWeb.Endpoint,
      Whisper.Counter,
-      AudioBuffer,
+      AudioBuffer
-      AudioFilesList
+      # AudioFilesList
    ]
    opts = [strategy: :one_for_one, name: Whisper.Supervisor]
--- a/whisper/lib/whisper/audio_manager/audio_file_list.ex
+++ b/whisper/lib/whisper/audio_manager/audio_file_list.ex
@ -1,66 +1,66 @@
-defmodule AudioFilesList do
+# defmodule AudioFilesList do
-  use GenServer
+#   use GenServer
-  require Logger
+#   require Logger
-  alias Phoenix.PubSub
+#   alias Phoenix.PubSub
-  def start_link(_opts) do
+#   def start_link(_opts) do
-    GenServer.start_link(__MODULE__, :idle, name: __MODULE__)
+#     GenServer.start_link(__MODULE__, :idle, name: __MODULE__)
-  end
+#   end
-  def add_file(path) do
+#   def add_file(path) do
-    Logger.debug("add file")
+#     Logger.debug("add file")
-    GenServer.cast(__MODULE__, {:new_file, path})
+#     GenServer.cast(__MODULE__, {:new_file, path})
-  end
+#   end
-  def init(:idle) do
+#   def init(:idle) do
-    Logger.info("AudioFilesList iniciado")
+#     Logger.info("AudioFilesList iniciado")
-    {:ok, %{queue: [], processing: false}}
+#     {:ok, %{queue: [], processing: false}}
-  end
+#   end
-  def handle_cast({:new_file, path}, %{queue: queue, processing: false} = state) do
+#   def handle_cast({:new_file, path}, %{queue: queue, processing: false} = state) do
-    Logger.info("📥 Archivo encolado: #{path}")
+#     Logger.info("📥 Archivo encolado: #{path}")
-    queue = queue ++ [path]
+#     queue = queue ++ [path]
-    [next | rest] = queue
+#     [next | rest] = queue
-    {:noreply, %{queue: rest, processing: false}}
+#     {:noreply, %{queue: rest, processing: false}}
-  end
+#   end
-  def handle_cast({:new_file, path}, %{queue: queue, processing: true} = state) do
+#   def handle_cast({:new_file, path}, %{queue: queue, processing: true} = state) do
-    {:noreply, %{state | queue: queue ++ [path]}}
+#     {:noreply, %{state | queue: queue ++ [path]}}
-  end
+#   end
-  def handle_info(:done, %{queue: []} = state) do
+#   def handle_info(:done, %{queue: []} = state) do
-    {:noreply, %{state | processing: false}}
+#     {:noreply, %{state | processing: false}}
-  end
+#   end
-  def handle_info(:done, %{queue: [next | rest]} = state) do
+#   def handle_info(:done, %{queue: [next | rest]} = state) do
-    {:noreply, %{state | queue: rest}}
+#     {:noreply, %{state | queue: rest}}
-  end
+#   end
-  # def handle_cast({:done_processing, path}, %{queue: queue} = state) do
+#   # def handle_cast({:done_processing, path}, %{queue: queue} = state) do
-  #   new_queue = Enum.reject(queue, fn p -> p == path end)
+#   #   new_queue = Enum.reject(queue, fn p -> p == path end)
-  #   Logger.info("🗑️ Archivo eliminado y removido de la cola: #{path}")
+#   #   Logger.info("🗑️ Archivo eliminado y removido de la cola: #{path}")
-  #   {:noreply, %{state | queue: new_queue}}
+#   #   {:noreply, %{state | queue: new_queue}}
-  # end
+#   # end
-  # defp process_file(path) do
+#   # defp process_file(path) do
-  #   Logger.info("▶️ Inicia procesamiento realtime: #{path}")
+#   #   Logger.info("▶️ Inicia procesamiento realtime: #{path}")
-  #   Task.start(fn ->
+#   #   Task.start(fn ->
-  #       case Whisper.SendToModel.realtime(path) do
+#   #       case Whisper.SendToModel.realtime(path) do
-  #         {:ok, text} when is_binary(text) ->
+#   #         {:ok, text} when is_binary(text) ->
-  #           message = %{"chunks" => [%{"text" => text}]}
+#   #           message = %{"chunks" => [%{"text" => text}]}
-  #           Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)})
+#   #           Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)})
-  #           Logger.info("✅ Transcripción (realtime): #{text}")
+#   #           Logger.info("✅ Transcripción (realtime): #{text}")
-  #           File.rm!(path)
+#   #           File.rm!(path)
-  #           # AudioFilesList.done_processing(path)
+#   #           # AudioFilesList.done_processing(path)
-  #         {:error, reason} ->
+#   #         {:error, reason} ->
-  #           Logger.error("❌ Error transcribiendo: #{inspect(reason)}")
+#   #           Logger.error("❌ Error transcribiendo: #{inspect(reason)}")
-  #       end
+#   #       end
-  #       send(__MODULE__, :done)
+#   #       send(__MODULE__, :done)
-  #   end)
+#   #   end)
-  # end
+#   # end
-end
+# end
--- a/whisper/lib/whisper_web/channels/audio_channel.ex
+++ b/whisper/lib/whisper_web/channels/audio_channel.ex
@ -26,22 +26,20 @@ defmodule WhisperWeb.AudioChannel do
    <<header_len::16, rest::binary>> = raw_binary
    <<header::binary-size(header_len), audio::binary>> = rest
-    IO.inspect(header, label: "HEADER BINARIO RECIBIDO")
+    %{"sample_rate" => rate} = Jason.decode!(header)
    ref = socket_id(socket)
-    case Jason.decode(header) do
+    Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}")
-      {:ok, %{"sample_rate" => rate}} ->
+    AudioBuffer.append(ref, {rate, audio})
        Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}")
        AudioBuffer.append(socket_id(socket), {rate, audio})
        {:noreply, socket}
-      {:error, reason} ->
+    # {:ok, path} = AudioSaver.save_chunk_as_wav(ref, audio, rate, "part")
-        Logger.error("Error decodificando header JSON: #{inspect(reason)}")
+    # AudioFilesList.add_file(path)
-        {:noreply, socket}
+
-    end
+
    {:noreply, socket}
  end
  @doc """
  Recupera todos los chunks acumulados en el buffer, los concatena y guarda un archivo WAV final (sufijo `"final"`).
  """
@ -60,8 +58,8 @@ defmodule WhisperWeb.AudioChannel do
      Task.start(fn ->
        transcription = Whisper.SendToModel.large(path)
        Logger.info("✅ Transcripción completa:\n#{transcription}")
-        # message = %{"chunks" => [%{"text" => transcription}]}
+        message = %{"chunks" => [%{"text" => transcription}]}
-        # Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription_m, Jason.encode!(message)})
+        Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)})
        File.rm!(path)
      end)
    end
--- a/whisper/lib/whisper_web/live/vad_live.ex
+++ b/whisper/lib/whisper_web/live/vad_live.ex
@ -1,28 +1,48 @@
 defmodule WhisperWeb.VadLive do
    use WhisperWeb, :live_view
  alias Phoenix.PubSub
    def mount(_, _, socket) do
-        {:ok, assign(socket, started: false)}
+        PubSub.subscribe(Whisper.PubSub, "transcription")
        socket =
            socket
            |> assign(:transcription, "")
            |> assign(:started, false)
        {:ok, socket}
    end
    def handle_event("start_vad", _params, socket) do
        push_event(socket, "init-vad", %{})
        {:noreply, assign(socket, started: true)}
    end
    def handle_info({:transcription, raw_json}, socket) do
        new_text =
            raw_json
            |> Jason.decode!()
            |> get_in(["chunks", Access.at(0), "text"])
        {:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))}
    end
    def render(assigns) do
        ~H"""
        <div id="vad-container" phx-hook="VadHook">
            <button phx-click="start_vad" class="btn btn-primary">🎙 Iniciar VAD</button>
-            <button phx-click="stop_vad" class="btn btn-danger">🛑 Detener VAD</button>
+
            <div id="vad-status" class="mt-4 text-sm text-gray-700"></div>
        </div>
        <div id="transcriptionContainer" class="w-full max-w-2xl space-y-4">
            <%= if @transcription != "" do %>
                <div class="p-4 bg-gray-100 rounded shadow-md">
                    <h2 class="text-sm font-semibold text-gray-700 mb-2">✅ Transcripción</h2>
                    <p class="text-green-600 whitespace-pre-wrap break-words text-sm leading-relaxed"><%= @transcription %></p>
                </div>
            <% end %>
        </div>
        """
    end
    def handle_event("start_vad", _, socket) do
        push_event(socket, "init-vad", %{})
        {:noreply, socket}
    end
    def handle_event("stop_vad", _, socket) do
        push_event(socket, "stop-vad", %{})
        {:noreply, socket}
    end
 end
--- a/whisper/priv/static/assets/app.js
+++ b/whisper/priv/static/assets/app.js