Intento conectarme al modelo y pasar los chunks y recibir las transcripciones (no funciona)
This commit is contained in:
@ -11,6 +11,7 @@ defmodule SttRecorder.Application do
|
||||
SttRecorderWeb.Telemetry,
|
||||
{DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore},
|
||||
{Phoenix.PubSub, name: SttRecorder.PubSub},
|
||||
SttServer.Transcriber,
|
||||
# Start the Finch HTTP client for sending emails
|
||||
{Finch, name: SttRecorder.Finch},
|
||||
# Start a worker by calling: SttRecorder.Worker.start_link(arg)
|
||||
|
70
stt_recorder/lib/stt_recorder/transcriber.ex
Normal file
70
stt_recorder/lib/stt_recorder/transcriber.ex
Normal file
@ -0,0 +1,70 @@
|
||||
defmodule SttServer.Transcriber do
|
||||
use GenServer
|
||||
require Logger
|
||||
|
||||
def start_link(_) do
|
||||
GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
|
||||
end
|
||||
|
||||
def init(_) do
|
||||
python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe"
|
||||
script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py"
|
||||
|
||||
Logger.info("👉 Python path: #{python}")
|
||||
Logger.info("👉 Script path: #{script_path}")
|
||||
|
||||
port = Port.open(
|
||||
{:spawn_executable, python},
|
||||
[:binary, :exit_status, :hide, args: [script_path]]
|
||||
)
|
||||
|
||||
{:ok, %{port: port}}
|
||||
end
|
||||
|
||||
def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do
|
||||
base64 = Base.encode64(binary_audio)
|
||||
message = %{
|
||||
event: "transcribe",
|
||||
audio_base64: base64,
|
||||
sample_rate: sample_rate
|
||||
}
|
||||
|
||||
json = Jason.encode!(message)
|
||||
Port.command(state.port, json <> "\n")
|
||||
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
def handle_cast({:set_param, param, value}, state) do
|
||||
message = %{event: "set_param", param: param, value: value}
|
||||
json = Jason.encode!(message)
|
||||
Port.command(state.port, json <> "\n")
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
def handle_info({port, {:data, data}}, state) do
|
||||
Logger.debug("💬 Output from Python: #{inspect(data)}")
|
||||
|
||||
case Jason.decode(data) do
|
||||
{:ok, %{"type" => "realtime", "text" => text}} ->
|
||||
# TODO: enviar el texto a LiveView/PubSub
|
||||
Logger.info("📢 Transcripción: #{text}")
|
||||
|
||||
{:ok, %{"type" => "log", "msg" => msg}} ->
|
||||
Logger.info("📝 Python: #{inspect(msg)}")
|
||||
|
||||
{:ok, %{"type" => "error", "error" => error}} ->
|
||||
Logger.error("⚠️ Error en Python: #{inspect(error)}")
|
||||
|
||||
_ ->
|
||||
Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}")
|
||||
end
|
||||
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
def handle_info({_port, {:exit_status, status}}, state) do
|
||||
Logger.error("⚠️ Proceso Python finalizado con status #{status}")
|
||||
{:stop, :python_exit, state}
|
||||
end
|
||||
end
|
@ -1,11 +1,17 @@
|
||||
defmodule SttServerWeb.DataChannel do
|
||||
use Phoenix.Channel
|
||||
|
||||
def join("data:lobby", _payload, socket) do
|
||||
IO.puts("🟢 Cliente conectado al canal de datos")
|
||||
def join("data:lobby", _params, socket) do
|
||||
Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output")
|
||||
{:ok, socket}
|
||||
end
|
||||
|
||||
|
||||
def handle_info({:broadcast_audio, msg}, socket) do
|
||||
push(socket, "transcription", Jason.decode!(msg))
|
||||
{:noreply, socket}
|
||||
end
|
||||
|
||||
# Recibe audio codificado en base64 (para transporte seguro)
|
||||
def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
|
||||
case Base.decode64(base64_chunk) do
|
||||
|
@ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do
|
||||
{:jason, "~> 1.2"},
|
||||
{:dns_cluster, "~> 0.1.1"},
|
||||
{:bandit, "~> 1.5"}
|
||||
|
||||
]
|
||||
end
|
||||
|
||||
|
53
stt_recorder/transcriber.py
Normal file
53
stt_recorder/transcriber.py
Normal file
@ -0,0 +1,53 @@
|
||||
import sys, json, base64
|
||||
import numpy as np
|
||||
import traceback
|
||||
|
||||
def main():
|
||||
from RealtimeSTT import AudioToTextRecorder
|
||||
from scipy.signal import resample
|
||||
import websockets
|
||||
import threading
|
||||
import logging
|
||||
import wave
|
||||
import json
|
||||
import time
|
||||
|
||||
# Inicializá el modelo (usa tu clase personalizada)
|
||||
model = AudioToTextRecorder(
|
||||
model="base",
|
||||
compute_type="int8",
|
||||
language="es"
|
||||
)
|
||||
|
||||
print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True)
|
||||
|
||||
for line in sys.stdin:
|
||||
try:
|
||||
request = json.loads(line.strip())
|
||||
if request["event"] == "transcribe":
|
||||
sample_rate = request.get("sample_rate", 16000)
|
||||
audio_data = base64.b64decode(request["audio_base64"])
|
||||
|
||||
audio_float32 = (
|
||||
np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
)
|
||||
|
||||
segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate)
|
||||
text = " ".join([s.text for s in segments if s.text.strip()])
|
||||
print(json.dumps({"type": "realtime", "text": text}), flush=True)
|
||||
elif request["event"] == "set_param":
|
||||
setattr(model, request["param"], request["value"])
|
||||
print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True)
|
||||
|
||||
else:
|
||||
print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True)
|
||||
|
||||
except Exception as e:
|
||||
print(json.dumps({
|
||||
"type": "error",
|
||||
"error": str(e),
|
||||
"trace": traceback.format_exc()
|
||||
}), flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user