Transcripcion realtime y cambio a la tanscripcion large. Conexion al modelo large v3.

2025-07-02 16:23:19 -03:00
parent 77f87c3655
commit 46df5fc5eb
12 changed files with 211 additions and 24 deletions
--- a/recognition_VAD/lib/recognition_VAD/application.ex
+++ b/recognition_VAD/lib/recognition_VAD/application.ex
@ -13,6 +13,7 @@ defmodule Recognition_VAD.Application do
      {Phoenix.PubSub, name: Recognition_VAD.PubSub},
      Recognition_VAD.AudioProcessor,
      Recognition_VAD.WhisperStreamer,
+      Recognition_VAD.LargeTranscriber,

      # Start the Finch HTTP client for sending emails
      {Finch, name: Recognition_VAD.Finch},
--- a/recognition_VAD/lib/recognition_VAD/audio_processor.ex
+++ b/recognition_VAD/lib/recognition_VAD/audio_processor.ex
@ -11,22 +11,29 @@ defmodule Recognition_VAD.AudioProcessor do
  end

  def handle_cast({:chunk, binary_audio, sample_rate}, state) do
-    # 👇 Guardamos el chunk en el buffer
-    new_buffer = [binary_audio | state.buffer] |> Enum.take(100) # máximo 100 chunks
+    new_buffer = [binary_audio | state.buffer] # 🔥 quitá el Enum.take(100)

    Logger.info("🟡 Recibido chunk de #{byte_size(binary_audio)} bytes a #{sample_rate} Hz")

    {:noreply, %{state | buffer: new_buffer, sample_rate: sample_rate}}
  end

+
  def handle_cast(:save_wav, state) do
    timestamp = DateTime.utc_now() |> DateTime.to_unix()
    filename = "recording_#{timestamp}.wav"
-
    Recognition_VAD.WavWriter.write_pcm_chunks_to_wav(state.buffer, state.sample_rate, filename)
    Logger.info("💾 Guardado archivo: #{filename}")
+    Recognition_VAD.LargeTranscriber.improve_transcription(filename)
+
+    # Notificamos a LiveView por PubSub
+    Phoenix.PubSub.broadcast(Recognition_VAD.PubSub, "audio_output", {:audio_saved, %{path: filename}})
+
    {:noreply, state}
  end
-
+  def handle_cast(:reset, state) do
+    Logger.info("🔄 Reset del buffer de audio para nueva grabación")
+    {:noreply, %{state | buffer: [], sample_rate: 0}}
+  end

 end
--- a/recognition_VAD/lib/recognition_VAD/large_transcriber.ex
+++ b/recognition_VAD/lib/recognition_VAD/large_transcriber.ex
@ -0,0 +1,67 @@
+defmodule Recognition_VAD.LargeTranscriber do
+  use GenServer
+  require Logger
+
+  @default_model "ggml-large-v3-turbo.bin"
+  @script_path "/home/aime-pc2/i_m/whisper.cpp/large_transcribe.sh"
+
+  def start_link(_opts) do
+    GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
+  end
+
+  @impl true
+  def init(state) do
+    {:ok, state}
+  end
+
+  @doc """
+  Llamada externa para iniciar la mejora con el modelo large.
+  """
+  def improve_transcription(audio_path) do
+    GenServer.cast(__MODULE__, {:improve, audio_path})
+  end
+
+  @impl true
+  def handle_cast({:improve, path}, state) do
+    Logger.info("🚀 LargeTranscriber recibió la ruta: #{path}")
+    large_path = "/mnt/c/Users/rolan/i_m/voice_recognition/recognition_VAD/#{path}"
+    Phoenix.PubSub.broadcast(Recognition_VAD.PubSub, "large", {:large_path, :info, "#{path}"})
+
+    transcribe(large_path, @default_model)
+
+    # Aquí luego vas a invocar el whisper grande con esa ruta.
+    {:noreply, state}
+  end
+  def transcribe(path, model) do
+
+    args = [@script_path, path, model]
+
+    case System.cmd("wsl", args, stderr_to_stdout: true) do
+      {output, 0} ->
+        text = extract_transcription(output)
+        # Logger.info("📝 Transcripción mejorada: #{text}")
+        Phoenix.PubSub.broadcast(Recognition_VAD.PubSub, "large", {:transcription_improved, :info, "#{text}"})
+        # Phoenix.PubSub.broadcast(Recognition_VAD.PubSub, "audio_output", {:log_message, :info, text, "large"})
+
+        {:ok, text}
+
+      {error_output, _} ->
+        Logger.error("❌ Error al transcribir con whisper: #{error_output}")
+        {:error, error_output}
+    end
+  end
+
+  defp extract_transcription(output) do
+    output
+    |> String.split("\n")
+    |> Enum.filter(fn line ->
+      line =~ ~r/[\p{L}\p{N}]/u and
+        not String.starts_with?(line, "whisper_") and
+        not String.starts_with?(line, "system_info") and
+        not String.starts_with?(line, "main: ") and
+        not String.starts_with?(line, "whisper_print_timings:")
+    end)
+    |> Enum.join(" ")
+    |> String.trim()
+  end
+end
--- a/recognition_VAD/lib/recognition_VAD/whisper.ex
+++ b/recognition_VAD/lib/recognition_VAD/whisper.ex
@ -11,7 +11,8 @@ defmodule Recognition_VAD.Whisper do
    case System.cmd("wsl", args, stderr_to_stdout: true) do
      {output, 0} ->
        text = extract_transcription(output)
-        Logger.info("📝 Transcripción: #{text}")
+        Logger.info("📝 Transcripción real time: #{text}")
+
        {:ok, text}

      {error_output, _} ->
--- a/recognition_VAD/lib/recognition_VAD/whisper_streamer.ex
+++ b/recognition_VAD/lib/recognition_VAD/whisper_streamer.ex
@ -2,7 +2,7 @@ defmodule Recognition_VAD.WhisperStreamer do
  use GenServer
  require Logger

-  @transcribe_interval 2000  # cada 2 segundos
+  @transcribe_interval 1000  # cada 1 segundo
  @max_chunks 100            # máximo a mantener en memoria

  def start_link(_opts) do
@ -47,9 +47,15 @@ defmodule Recognition_VAD.WhisperStreamer do
    end)

    schedule_transcription()
-    {:noreply, %{state | chunks: []}}
+
+    # 👉 Conservamos un 25% del audio anterior para contexto
+    overlap_chunks =
+      Enum.take(Enum.reverse(chunks), trunc(length(chunks) * 0.25))
+
+    {:noreply, %{state | chunks: overlap_chunks}}
  end

+
  defp schedule_transcription do
    Process.send_after(self(), :transcribe_timer, @transcribe_interval)
  end