From ba9ecfcff4e90d1e047e4591be9f3237bd9551bc Mon Sep 17 00:00:00 2001 From: aime-rolandi Date: Tue, 17 Jun 2025 16:46:04 -0300 Subject: [PATCH] Intento conectarme al modelo y pasar los chunks y recibir las transcripciones (no funciona) --- stt_recorder/lib/stt_recorder/application.ex | 1 + stt_recorder/lib/stt_recorder/transcriber.ex | 70 +++++++++++++++++++ .../stt_recorder_web/channels/data_chanel.ex | 10 ++- stt_recorder/mix.exs | 1 + stt_recorder/transcriber.py | 53 ++++++++++++++ 5 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 stt_recorder/lib/stt_recorder/transcriber.ex create mode 100644 stt_recorder/transcriber.py diff --git a/stt_recorder/lib/stt_recorder/application.ex b/stt_recorder/lib/stt_recorder/application.ex index 6ea50ed4..b3343e86 100644 --- a/stt_recorder/lib/stt_recorder/application.ex +++ b/stt_recorder/lib/stt_recorder/application.ex @@ -11,6 +11,7 @@ defmodule SttRecorder.Application do SttRecorderWeb.Telemetry, {DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore}, {Phoenix.PubSub, name: SttRecorder.PubSub}, + SttServer.Transcriber, # Start the Finch HTTP client for sending emails {Finch, name: SttRecorder.Finch}, # Start a worker by calling: SttRecorder.Worker.start_link(arg) diff --git a/stt_recorder/lib/stt_recorder/transcriber.ex b/stt_recorder/lib/stt_recorder/transcriber.ex new file mode 100644 index 00000000..412072a7 --- /dev/null +++ b/stt_recorder/lib/stt_recorder/transcriber.ex @@ -0,0 +1,70 @@ +defmodule SttServer.Transcriber do + use GenServer + require Logger + + def start_link(_) do + GenServer.start_link(__MODULE__, %{}, name: __MODULE__) + end + + def init(_) do + python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe" + script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py" + + Logger.info("👉 Python path: #{python}") + Logger.info("👉 Script path: #{script_path}") + + port = Port.open( + {:spawn_executable, python}, + [:binary, :exit_status, :hide, args: [script_path]] + ) + + {:ok, %{port: port}} + end + + def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do + base64 = Base.encode64(binary_audio) + message = %{ + event: "transcribe", + audio_base64: base64, + sample_rate: sample_rate + } + + json = Jason.encode!(message) + Port.command(state.port, json <> "\n") + + {:noreply, state} + end + + def handle_cast({:set_param, param, value}, state) do + message = %{event: "set_param", param: param, value: value} + json = Jason.encode!(message) + Port.command(state.port, json <> "\n") + {:noreply, state} + end + + def handle_info({port, {:data, data}}, state) do + Logger.debug("💬 Output from Python: #{inspect(data)}") + + case Jason.decode(data) do + {:ok, %{"type" => "realtime", "text" => text}} -> + # TODO: enviar el texto a LiveView/PubSub + Logger.info("📢 Transcripción: #{text}") + + {:ok, %{"type" => "log", "msg" => msg}} -> + Logger.info("📝 Python: #{inspect(msg)}") + + {:ok, %{"type" => "error", "error" => error}} -> + Logger.error("⚠️ Error en Python: #{inspect(error)}") + + _ -> + Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}") + end + + {:noreply, state} + end + + def handle_info({_port, {:exit_status, status}}, state) do + Logger.error("⚠️ Proceso Python finalizado con status #{status}") + {:stop, :python_exit, state} + end +end diff --git a/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex b/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex index 6d5344f0..50f85e8f 100644 --- a/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex +++ b/stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex @@ -1,11 +1,17 @@ defmodule SttServerWeb.DataChannel do use Phoenix.Channel - def join("data:lobby", _payload, socket) do - IO.puts("🟢 Cliente conectado al canal de datos") + def join("data:lobby", _params, socket) do + Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output") {:ok, socket} end + + def handle_info({:broadcast_audio, msg}, socket) do + push(socket, "transcription", Jason.decode!(msg)) + {:noreply, socket} + end + # Recibe audio codificado en base64 (para transporte seguro) def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do case Base.decode64(base64_chunk) do diff --git a/stt_recorder/mix.exs b/stt_recorder/mix.exs index d4b0df13..474a7b3d 100644 --- a/stt_recorder/mix.exs +++ b/stt_recorder/mix.exs @@ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do {:jason, "~> 1.2"}, {:dns_cluster, "~> 0.1.1"}, {:bandit, "~> 1.5"} + ] end diff --git a/stt_recorder/transcriber.py b/stt_recorder/transcriber.py new file mode 100644 index 00000000..6616a550 --- /dev/null +++ b/stt_recorder/transcriber.py @@ -0,0 +1,53 @@ +import sys, json, base64 +import numpy as np +import traceback + +def main(): + from RealtimeSTT import AudioToTextRecorder + from scipy.signal import resample + import websockets + import threading + import logging + import wave + import json + import time + + # Inicializá el modelo (usa tu clase personalizada) + model = AudioToTextRecorder( + model="base", + compute_type="int8", + language="es" + ) + + print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True) + + for line in sys.stdin: + try: + request = json.loads(line.strip()) + if request["event"] == "transcribe": + sample_rate = request.get("sample_rate", 16000) + audio_data = base64.b64decode(request["audio_base64"]) + + audio_float32 = ( + np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0 + ) + + segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate) + text = " ".join([s.text for s in segments if s.text.strip()]) + print(json.dumps({"type": "realtime", "text": text}), flush=True) + elif request["event"] == "set_param": + setattr(model, request["param"], request["value"]) + print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True) + + else: + print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True) + + except Exception as e: + print(json.dumps({ + "type": "error", + "error": str(e), + "trace": traceback.format_exc() + }), flush=True) + +if __name__ == "__main__": + main() \ No newline at end of file