Intento conectarme al modelo y pasar los chunks y recibir las transcripciones (no funciona)

2025-06-17 16:46:04 -03:00
parent 07526dcccf
commit ba9ecfcff4
5 changed files with 133 additions and 2 deletions
--- a/stt_recorder/lib/stt_recorder/application.ex
+++ b/stt_recorder/lib/stt_recorder/application.ex
@ -11,6 +11,7 @@ defmodule SttRecorder.Application do
      SttRecorderWeb.Telemetry,
      {DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore},
      {Phoenix.PubSub, name: SttRecorder.PubSub},
      SttServer.Transcriber,
      # Start the Finch HTTP client for sending emails
      {Finch, name: SttRecorder.Finch},
      # Start a worker by calling: SttRecorder.Worker.start_link(arg)
--- a/stt_recorder/lib/stt_recorder/transcriber.ex
+++ b/stt_recorder/lib/stt_recorder/transcriber.ex
@ -0,0 +1,70 @@
 defmodule SttServer.Transcriber do
  use GenServer
  require Logger
  def start_link(_) do
    GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
  end
  def init(_) do
    python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe"
    script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py"
    Logger.info("👉 Python path: #{python}")
    Logger.info("👉 Script path: #{script_path}")
    port = Port.open(
      {:spawn_executable, python},
      [:binary, :exit_status, :hide, args: [script_path]]
    )
    {:ok, %{port: port}}
  end
  def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do
    base64 = Base.encode64(binary_audio)
    message = %{
      event: "transcribe",
      audio_base64: base64,
      sample_rate: sample_rate
    }
    json = Jason.encode!(message)
    Port.command(state.port, json <> "\n")
    {:noreply, state}
  end
  def handle_cast({:set_param, param, value}, state) do
    message = %{event: "set_param", param: param, value: value}
    json = Jason.encode!(message)
    Port.command(state.port, json <> "\n")
    {:noreply, state}
  end
  def handle_info({port, {:data, data}}, state) do
    Logger.debug("💬 Output from Python: #{inspect(data)}")
    case Jason.decode(data) do
      {:ok, %{"type" => "realtime", "text" => text}} ->
        # TODO: enviar el texto a LiveView/PubSub
        Logger.info("📢 Transcripción: #{text}")
      {:ok, %{"type" => "log", "msg" => msg}} ->
        Logger.info("📝 Python: #{inspect(msg)}")
      {:ok, %{"type" => "error", "error" => error}} ->
        Logger.error("⚠️ Error en Python: #{inspect(error)}")
      _ ->
        Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}")
    end
    {:noreply, state}
  end
  def handle_info({_port, {:exit_status, status}}, state) do
    Logger.error("⚠️ Proceso Python finalizado con status #{status}")
    {:stop, :python_exit, state}
  end
 end
--- a/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
+++ b/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
@ -1,11 +1,17 @@
 defmodule SttServerWeb.DataChannel do
  use Phoenix.Channel
-  def join("data:lobby", _payload, socket) do
+  def join("data:lobby", _params, socket) do
-    IO.puts("🟢 Cliente conectado al canal de datos")
+    Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output")
    {:ok, socket}
  end
  def handle_info({:broadcast_audio, msg}, socket) do
    push(socket, "transcription", Jason.decode!(msg))
    {:noreply, socket}
  end
  # Recibe audio codificado en base64 (para transporte seguro)
  def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
    case Base.decode64(base64_chunk) do
--- a/stt_recorder/mix.exs
+++ b/stt_recorder/mix.exs
@ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do
      {:jason, "~> 1.2"},
      {:dns_cluster, "~> 0.1.1"},
      {:bandit, "~> 1.5"}
    ]
  end
--- a/stt_recorder/transcriber.py
+++ b/stt_recorder/transcriber.py
@ -0,0 +1,53 @@
 import sys, json, base64
 import numpy as np
 import traceback
 def main():
    from RealtimeSTT import AudioToTextRecorder
    from scipy.signal import resample
    import websockets
    import threading
    import logging
    import wave
    import json
    import time
    # Inicializá el modelo (usa tu clase personalizada)
    model = AudioToTextRecorder(
        model="base",
        compute_type="int8",
        language="es"
    )
    print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True)
    for line in sys.stdin:
        try:
            request = json.loads(line.strip())
            if request["event"] == "transcribe":
                sample_rate = request.get("sample_rate", 16000)
                audio_data = base64.b64decode(request["audio_base64"])
                audio_float32 = (
                    np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
                )
                segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate)
                text = " ".join([s.text for s in segments if s.text.strip()])
                print(json.dumps({"type": "realtime", "text": text}), flush=True)
            elif request["event"] == "set_param":
                setattr(model, request["param"], request["value"])
                print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True)
            else:
                print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True)
        except Exception as e:
            print(json.dumps({
                "type": "error",
                "error": str(e),
                "trace": traceback.format_exc()
            }), flush=True)
 if __name__ == "__main__":
    main()