Intento conectarme al modelo y pasar los chunks y recibir las transcripciones (no funciona)
This commit is contained in:
		| @ -11,6 +11,7 @@ defmodule SttRecorder.Application do | ||||
|       SttRecorderWeb.Telemetry, | ||||
|       {DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore}, | ||||
|       {Phoenix.PubSub, name: SttRecorder.PubSub}, | ||||
|       SttServer.Transcriber, | ||||
|       # Start the Finch HTTP client for sending emails | ||||
|       {Finch, name: SttRecorder.Finch}, | ||||
|       # Start a worker by calling: SttRecorder.Worker.start_link(arg) | ||||
|  | ||||
							
								
								
									
										70
									
								
								stt_recorder/lib/stt_recorder/transcriber.ex
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								stt_recorder/lib/stt_recorder/transcriber.ex
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,70 @@ | ||||
| defmodule SttServer.Transcriber do | ||||
|   use GenServer | ||||
|   require Logger | ||||
|  | ||||
|   def start_link(_) do | ||||
|     GenServer.start_link(__MODULE__, %{}, name: __MODULE__) | ||||
|   end | ||||
|  | ||||
|   def init(_) do | ||||
|     python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe" | ||||
|     script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py" | ||||
|  | ||||
|     Logger.info("👉 Python path: #{python}") | ||||
|     Logger.info("👉 Script path: #{script_path}") | ||||
|  | ||||
|     port = Port.open( | ||||
|       {:spawn_executable, python}, | ||||
|       [:binary, :exit_status, :hide, args: [script_path]] | ||||
|     ) | ||||
|  | ||||
|     {:ok, %{port: port}} | ||||
|   end | ||||
|  | ||||
|   def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do | ||||
|     base64 = Base.encode64(binary_audio) | ||||
|     message = %{ | ||||
|       event: "transcribe", | ||||
|       audio_base64: base64, | ||||
|       sample_rate: sample_rate | ||||
|     } | ||||
|  | ||||
|     json = Jason.encode!(message) | ||||
|     Port.command(state.port, json <> "\n") | ||||
|  | ||||
|     {:noreply, state} | ||||
|   end | ||||
|  | ||||
|   def handle_cast({:set_param, param, value}, state) do | ||||
|     message = %{event: "set_param", param: param, value: value} | ||||
|     json = Jason.encode!(message) | ||||
|     Port.command(state.port, json <> "\n") | ||||
|     {:noreply, state} | ||||
|   end | ||||
|  | ||||
|   def handle_info({port, {:data, data}}, state) do | ||||
|     Logger.debug("💬 Output from Python: #{inspect(data)}") | ||||
|  | ||||
|     case Jason.decode(data) do | ||||
|       {:ok, %{"type" => "realtime", "text" => text}} -> | ||||
|         # TODO: enviar el texto a LiveView/PubSub | ||||
|         Logger.info("📢 Transcripción: #{text}") | ||||
|  | ||||
|       {:ok, %{"type" => "log", "msg" => msg}} -> | ||||
|         Logger.info("📝 Python: #{inspect(msg)}") | ||||
|  | ||||
|       {:ok, %{"type" => "error", "error" => error}} -> | ||||
|         Logger.error("⚠️ Error en Python: #{inspect(error)}") | ||||
|  | ||||
|       _ -> | ||||
|         Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}") | ||||
|     end | ||||
|  | ||||
|     {:noreply, state} | ||||
|   end | ||||
|  | ||||
|   def handle_info({_port, {:exit_status, status}}, state) do | ||||
|     Logger.error("⚠️ Proceso Python finalizado con status #{status}") | ||||
|     {:stop, :python_exit, state} | ||||
|   end | ||||
| end | ||||
| @ -1,11 +1,17 @@ | ||||
| defmodule SttServerWeb.DataChannel do | ||||
|   use Phoenix.Channel | ||||
|  | ||||
|   def join("data:lobby", _payload, socket) do | ||||
|     IO.puts("🟢 Cliente conectado al canal de datos") | ||||
|   def join("data:lobby", _params, socket) do | ||||
|     Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output") | ||||
|     {:ok, socket} | ||||
|   end | ||||
|  | ||||
|  | ||||
|   def handle_info({:broadcast_audio, msg}, socket) do | ||||
|     push(socket, "transcription", Jason.decode!(msg)) | ||||
|     {:noreply, socket} | ||||
|   end | ||||
|  | ||||
|   # Recibe audio codificado en base64 (para transporte seguro) | ||||
|   def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do | ||||
|     case Base.decode64(base64_chunk) do | ||||
|  | ||||
| @ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do | ||||
|       {:jason, "~> 1.2"}, | ||||
|       {:dns_cluster, "~> 0.1.1"}, | ||||
|       {:bandit, "~> 1.5"} | ||||
|  | ||||
|     ] | ||||
|   end | ||||
|  | ||||
|  | ||||
							
								
								
									
										53
									
								
								stt_recorder/transcriber.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								stt_recorder/transcriber.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | ||||
| import sys, json, base64 | ||||
| import numpy as np | ||||
| import traceback | ||||
|  | ||||
| def main(): | ||||
|     from RealtimeSTT import AudioToTextRecorder | ||||
|     from scipy.signal import resample | ||||
|     import websockets | ||||
|     import threading | ||||
|     import logging | ||||
|     import wave | ||||
|     import json | ||||
|     import time | ||||
|  | ||||
|     # Inicializá el modelo (usa tu clase personalizada) | ||||
|     model = AudioToTextRecorder( | ||||
|         model="base", | ||||
|         compute_type="int8", | ||||
|         language="es" | ||||
|     ) | ||||
|  | ||||
|     print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True) | ||||
|  | ||||
|     for line in sys.stdin: | ||||
|         try: | ||||
|             request = json.loads(line.strip()) | ||||
|             if request["event"] == "transcribe": | ||||
|                 sample_rate = request.get("sample_rate", 16000) | ||||
|                 audio_data = base64.b64decode(request["audio_base64"]) | ||||
|  | ||||
|                 audio_float32 = ( | ||||
|                     np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0 | ||||
|                 ) | ||||
|  | ||||
|                 segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate) | ||||
|                 text = " ".join([s.text for s in segments if s.text.strip()]) | ||||
|                 print(json.dumps({"type": "realtime", "text": text}), flush=True) | ||||
|             elif request["event"] == "set_param": | ||||
|                 setattr(model, request["param"], request["value"]) | ||||
|                 print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True) | ||||
|  | ||||
|             else: | ||||
|                 print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True) | ||||
|  | ||||
|         except Exception as e: | ||||
|             print(json.dumps({ | ||||
|                 "type": "error", | ||||
|                 "error": str(e), | ||||
|                 "trace": traceback.format_exc() | ||||
|             }), flush=True) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Reference in New Issue
	
	Block a user