Intento conectarme al modelo y pasar los chunks y recibir las transcripciones (no funciona)

2025-06-17 16:46:04 -03:00
parent 07526dcccf
commit ba9ecfcff4
5 changed files with 133 additions and 2 deletions
--- a/stt_recorder/lib/stt_recorder/application.ex
+++ b/stt_recorder/lib/stt_recorder/application.ex
@ -11,6 +11,7 @@ defmodule SttRecorder.Application do
      SttRecorderWeb.Telemetry,
      {DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore},
      {Phoenix.PubSub, name: SttRecorder.PubSub},
+      SttServer.Transcriber,
      # Start the Finch HTTP client for sending emails
      {Finch, name: SttRecorder.Finch},
      # Start a worker by calling: SttRecorder.Worker.start_link(arg)
--- a/stt_recorder/lib/stt_recorder/transcriber.ex
+++ b/stt_recorder/lib/stt_recorder/transcriber.ex
@ -0,0 +1,70 @@
+defmodule SttServer.Transcriber do
+  use GenServer
+  require Logger
+
+  def start_link(_) do
+    GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
+  end
+
+  def init(_) do
+    python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe"
+    script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py"
+
+    Logger.info("👉 Python path: #{python}")
+    Logger.info("👉 Script path: #{script_path}")
+
+    port = Port.open(
+      {:spawn_executable, python},
+      [:binary, :exit_status, :hide, args: [script_path]]
+    )
+
+    {:ok, %{port: port}}
+  end
+
+  def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do
+    base64 = Base.encode64(binary_audio)
+    message = %{
+      event: "transcribe",
+      audio_base64: base64,
+      sample_rate: sample_rate
+    }
+
+    json = Jason.encode!(message)
+    Port.command(state.port, json <> "\n")
+
+    {:noreply, state}
+  end
+
+  def handle_cast({:set_param, param, value}, state) do
+    message = %{event: "set_param", param: param, value: value}
+    json = Jason.encode!(message)
+    Port.command(state.port, json <> "\n")
+    {:noreply, state}
+  end
+
+  def handle_info({port, {:data, data}}, state) do
+    Logger.debug("💬 Output from Python: #{inspect(data)}")
+
+    case Jason.decode(data) do
+      {:ok, %{"type" => "realtime", "text" => text}} ->
+        # TODO: enviar el texto a LiveView/PubSub
+        Logger.info("📢 Transcripción: #{text}")
+
+      {:ok, %{"type" => "log", "msg" => msg}} ->
+        Logger.info("📝 Python: #{inspect(msg)}")
+
+      {:ok, %{"type" => "error", "error" => error}} ->
+        Logger.error("⚠️ Error en Python: #{inspect(error)}")
+
+      _ ->
+        Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}")
+    end
+
+    {:noreply, state}
+  end
+
+  def handle_info({_port, {:exit_status, status}}, state) do
+    Logger.error("⚠️ Proceso Python finalizado con status #{status}")
+    {:stop, :python_exit, state}
+  end
+end
--- a/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
+++ b/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
@ -1,11 +1,17 @@
 defmodule SttServerWeb.DataChannel do
  use Phoenix.Channel

-  def join("data:lobby", _payload, socket) do
-    IO.puts("🟢 Cliente conectado al canal de datos")
+  def join("data:lobby", _params, socket) do
+    Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output")
    {:ok, socket}
  end

+
+  def handle_info({:broadcast_audio, msg}, socket) do
+    push(socket, "transcription", Jason.decode!(msg))
+    {:noreply, socket}
+  end
+
  # Recibe audio codificado en base64 (para transporte seguro)
  def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
    case Base.decode64(base64_chunk) do
--- a/stt_recorder/mix.exs
+++ b/stt_recorder/mix.exs
@ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do
      {:jason, "~> 1.2"},
      {:dns_cluster, "~> 0.1.1"},
      {:bandit, "~> 1.5"}
+
    ]
  end

--- a/stt_recorder/transcriber.py
+++ b/stt_recorder/transcriber.py
@ -0,0 +1,53 @@
+import sys, json, base64
+import numpy as np
+import traceback
+
+def main():
+    from RealtimeSTT import AudioToTextRecorder
+    from scipy.signal import resample
+    import websockets
+    import threading
+    import logging
+    import wave
+    import json
+    import time
+
+    # Inicializá el modelo (usa tu clase personalizada)
+    model = AudioToTextRecorder(
+        model="base",
+        compute_type="int8",
+        language="es"
+    )
+
+    print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True)
+
+    for line in sys.stdin:
+        try:
+            request = json.loads(line.strip())
+            if request["event"] == "transcribe":
+                sample_rate = request.get("sample_rate", 16000)
+                audio_data = base64.b64decode(request["audio_base64"])
+
+                audio_float32 = (
+                    np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
+                )
+
+                segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate)
+                text = " ".join([s.text for s in segments if s.text.strip()])
+                print(json.dumps({"type": "realtime", "text": text}), flush=True)
+            elif request["event"] == "set_param":
+                setattr(model, request["param"], request["value"])
+                print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True)
+
+            else:
+                print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True)
+
+        except Exception as e:
+            print(json.dumps({
+                "type": "error",
+                "error": str(e),
+                "trace": traceback.format_exc()
+            }), flush=True)
+
+if __name__ == "__main__":
+    main()