whisper large v3, arranque run.sh

2025-08-04 19:23:40 +00:00
parent 08cf12beb1
commit 4a22a68ce3
1244 changed files with 235 additions and 207 deletions
--- a/whisper/lib/whisper_web/channels/audio_channel.ex
+++ b/whisper/lib/whisper_web/channels/audio_channel.ex
@ -31,49 +31,69 @@ defmodule WhisperWeb.AudioChannel do

    Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}")
    AudioBuffer.append(ref, {rate, audio})
+    chunks = AudioBuffer.get_and_clear(ref)

-    # {:ok, path} = AudioSaver.save_chunk_as_wav(ref, audio, rate, "part")
-    # AudioFilesList.add_file(path)
+    start_total = System.monotonic_time(:millisecond)
+
+    if chunks != [] do
+
+      Task.start(fn ->
+        [{rate, _} | _] = chunks
+        full_audio = Enum.map(chunks, fn {_, bin} -> bin end) |> IO.iodata_to_binary()
+
+        {wav_time, {:ok, path}} =
+          :timer.tc(fn ->
+            AudioSaver.save_chunk_as_wav(ref, full_audio, rate, "part")
+          end)
+        model_start = System.monotonic_time(:millisecond)
+        Logger.info("WAV guardado en #{div(wav_time, 1000)} ms")


+        transcription =
+          if path do
+            case Nx.Serving.batched_run(Whisper.LargeModel.Serving, {:file, path}) do
+              %{chunks: chunks} ->
+                chunks
+                |> Enum.map(& &1.text)
+                |> Enum.join(" ")
+
+              _ ->
+                "Transcripción no disponible"
+            end
+          else
+            "Archivo no disponible"
+          end
+
+
+        model_end = System.monotonic_time(:millisecond)
+        Logger.info("El modelo procesó en #{model_end - model_start} ms")
+
+        Logger.info("✅ Transcripción:\n#{transcription}")
+
+        message = %{"chunks" => [%{"text" => transcription}]}
+        PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, %{
+          "received_at" => model_start,
+          "text" => transcription
+        }})
+        File.rm!(path)
+
+        end_total = System.monotonic_time(:millisecond)
+        Logger.info("⏱ Total procesamiento stop_audio: #{end_total - start_total} ms")
+      end)
+    end
    {:noreply, socket}
  end

-
-  @doc """
-  Recupera todos los chunks acumulados en el buffer, los concatena y guarda un archivo WAV final (sufijo `"final"`).
-  """
  def handle_in("stop_audio", _payload, socket) do
-
    Logger.info("🛑 Grabación detenida por cliente")

    ref = socket_id(socket)
-    chunks = AudioBuffer.get_and_clear(ref)

-    if chunks != [] do
-      [{rate, _} | _] = chunks
-      full_audio = Enum.map(chunks, fn {_, bin} -> bin end) |> IO.iodata_to_binary()
-      {:ok, path} = AudioSaver.save_chunk_as_wav(ref, full_audio, rate, "final")
-      
-      Task.start(fn ->
-        transcription = Whisper.SendToModel.large(path)
-        Logger.info("✅ Transcripción completa:\n#{transcription}")
-        message = %{"chunks" => [%{"text" => transcription}]}
-        Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)})
-        File.rm!(path)
-      end)
-    end

    {:noreply, socket}
  end

+
  defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()
  
-  def save_raw(ref, bin) do
-    File.mkdir_p!("recordings/")
-    filename = "#{ref}_#{Whisper.Counter.next(ref)}.raw"
-    path = Path.join("recordings", filename)
-    File.write!(path, bin)
-    {:ok, path}
-  end
 end