Compare commits
2 Commits
22e406b9dd
...
ba9ecfcff4
Author | SHA1 | Date | |
---|---|---|---|
ba9ecfcff4 | |||
07526dcccf |
@ -11,6 +11,7 @@ defmodule SttRecorder.Application do
|
|||||||
SttRecorderWeb.Telemetry,
|
SttRecorderWeb.Telemetry,
|
||||||
{DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore},
|
{DNSCluster, query: Application.get_env(:stt_recorder, :dns_cluster_query) || :ignore},
|
||||||
{Phoenix.PubSub, name: SttRecorder.PubSub},
|
{Phoenix.PubSub, name: SttRecorder.PubSub},
|
||||||
|
SttServer.Transcriber,
|
||||||
# Start the Finch HTTP client for sending emails
|
# Start the Finch HTTP client for sending emails
|
||||||
{Finch, name: SttRecorder.Finch},
|
{Finch, name: SttRecorder.Finch},
|
||||||
# Start a worker by calling: SttRecorder.Worker.start_link(arg)
|
# Start a worker by calling: SttRecorder.Worker.start_link(arg)
|
||||||
|
70
stt_recorder/lib/stt_recorder/transcriber.ex
Normal file
70
stt_recorder/lib/stt_recorder/transcriber.ex
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
defmodule SttServer.Transcriber do
|
||||||
|
use GenServer
|
||||||
|
require Logger
|
||||||
|
|
||||||
|
def start_link(_) do
|
||||||
|
GenServer.start_link(__MODULE__, %{}, name: __MODULE__)
|
||||||
|
end
|
||||||
|
|
||||||
|
def init(_) do
|
||||||
|
python = "c:/Users/rolan/i_m/voice_recognition/minimal_server/realtime-env311/Scripts/python.exe"
|
||||||
|
script_path = "c:/Users/rolan/i_m/voice_recognition/stt_recorder/transcriber.py"
|
||||||
|
|
||||||
|
Logger.info("👉 Python path: #{python}")
|
||||||
|
Logger.info("👉 Script path: #{script_path}")
|
||||||
|
|
||||||
|
port = Port.open(
|
||||||
|
{:spawn_executable, python},
|
||||||
|
[:binary, :exit_status, :hide, args: [script_path]]
|
||||||
|
)
|
||||||
|
|
||||||
|
{:ok, %{port: port}}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_cast({:audio_chunk, binary_audio, sample_rate}, state) do
|
||||||
|
base64 = Base.encode64(binary_audio)
|
||||||
|
message = %{
|
||||||
|
event: "transcribe",
|
||||||
|
audio_base64: base64,
|
||||||
|
sample_rate: sample_rate
|
||||||
|
}
|
||||||
|
|
||||||
|
json = Jason.encode!(message)
|
||||||
|
Port.command(state.port, json <> "\n")
|
||||||
|
|
||||||
|
{:noreply, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_cast({:set_param, param, value}, state) do
|
||||||
|
message = %{event: "set_param", param: param, value: value}
|
||||||
|
json = Jason.encode!(message)
|
||||||
|
Port.command(state.port, json <> "\n")
|
||||||
|
{:noreply, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_info({port, {:data, data}}, state) do
|
||||||
|
Logger.debug("💬 Output from Python: #{inspect(data)}")
|
||||||
|
|
||||||
|
case Jason.decode(data) do
|
||||||
|
{:ok, %{"type" => "realtime", "text" => text}} ->
|
||||||
|
# TODO: enviar el texto a LiveView/PubSub
|
||||||
|
Logger.info("📢 Transcripción: #{text}")
|
||||||
|
|
||||||
|
{:ok, %{"type" => "log", "msg" => msg}} ->
|
||||||
|
Logger.info("📝 Python: #{inspect(msg)}")
|
||||||
|
|
||||||
|
{:ok, %{"type" => "error", "error" => error}} ->
|
||||||
|
Logger.error("⚠️ Error en Python: #{inspect(error)}")
|
||||||
|
|
||||||
|
_ ->
|
||||||
|
Logger.warning("⚠️ Mensaje inesperado de Python: #{inspect(data)}")
|
||||||
|
end
|
||||||
|
|
||||||
|
{:noreply, state}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_info({_port, {:exit_status, status}}, state) do
|
||||||
|
Logger.error("⚠️ Proceso Python finalizado con status #{status}")
|
||||||
|
{:stop, :python_exit, state}
|
||||||
|
end
|
||||||
|
end
|
17
stt_recorder/lib/stt_recorder_web/channels/control_chanel.ex
Normal file
17
stt_recorder/lib/stt_recorder_web/channels/control_chanel.ex
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
defmodule SttRecorderWeb.Channels.ControlChanel do
|
||||||
|
use Phoenix.Channel
|
||||||
|
|
||||||
|
def join("control:lobby", _payload, socket) do
|
||||||
|
{:ok, socket}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_in("set_parameter", %{"parameter" => param, "value" => value}, socket) do
|
||||||
|
GenServer.cast(SttServer.Transcriber, {:set_param, param, value})
|
||||||
|
{:reply, {:ok, %{message: "Parameter updated"}}, socket}
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_in("call_method", %{"method" => method}, socket) do
|
||||||
|
GenServer.cast(SttServer.Transcriber, {:call_method, method})
|
||||||
|
{:reply, {:ok, %{message: "Method called"}}, socket}
|
||||||
|
end
|
||||||
|
end
|
32
stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
Normal file
32
stt_recorder/lib/stt_recorder_web/channels/data_chanel.ex
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
defmodule SttServerWeb.DataChannel do
|
||||||
|
use Phoenix.Channel
|
||||||
|
|
||||||
|
def join("data:lobby", _params, socket) do
|
||||||
|
Phoenix.PubSub.subscribe(SttRecorder.PubSub, "audio_output")
|
||||||
|
{:ok, socket}
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def handle_info({:broadcast_audio, msg}, socket) do
|
||||||
|
push(socket, "transcription", Jason.decode!(msg))
|
||||||
|
{:noreply, socket}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Recibe audio codificado en base64 (para transporte seguro)
|
||||||
|
def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
|
||||||
|
case Base.decode64(base64_chunk) do
|
||||||
|
{:ok, binary_audio} ->
|
||||||
|
# Enviamos al transcriptor
|
||||||
|
GenServer.cast(SttServer.Transcriber, {:audio_chunk, binary_audio, sample_rate})
|
||||||
|
{:noreply, socket}
|
||||||
|
|
||||||
|
:error ->
|
||||||
|
IO.puts("⚠️ Error al decodificar base64")
|
||||||
|
{:reply, {:error, %{reason: "Invalid base64 audio"}}, socket}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_in(_unknown, _payload, socket) do
|
||||||
|
{:noreply, socket}
|
||||||
|
end
|
||||||
|
end
|
14
stt_recorder/lib/stt_recorder_web/channels/user_socket.ex
Normal file
14
stt_recorder/lib/stt_recorder_web/channels/user_socket.ex
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
defmodule SttRecorderWeb.Channels.UserSocket do
|
||||||
|
use Phoenix.Socket
|
||||||
|
|
||||||
|
## Channels
|
||||||
|
channel "control:*", SttRecorderWeb.Channels.ControlChanel
|
||||||
|
channel "data:*", SttServerWeb.DataChannel
|
||||||
|
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def connect(_params, socket, _connect_info), do: {:ok, socket}
|
||||||
|
|
||||||
|
@impl true
|
||||||
|
def id(_socket), do: nil
|
||||||
|
end
|
@ -15,6 +15,9 @@ defmodule SttRecorderWeb.Endpoint do
|
|||||||
websocket: [connect_info: [session: @session_options]],
|
websocket: [connect_info: [session: @session_options]],
|
||||||
longpoll: [connect_info: [session: @session_options]]
|
longpoll: [connect_info: [session: @session_options]]
|
||||||
|
|
||||||
|
socket "/socket", SttRecorderWeb.Channels.UserSocket,
|
||||||
|
websocket: true,
|
||||||
|
longpoll: false
|
||||||
# Serve at "/" the static files from "priv/static" directory.
|
# Serve at "/" the static files from "priv/static" directory.
|
||||||
#
|
#
|
||||||
# You should set gzip to true if you are running phx.digest
|
# You should set gzip to true if you are running phx.digest
|
||||||
|
115
stt_recorder/lib/stt_recorder_web/live/stt/test_with_channel.ex
Normal file
115
stt_recorder/lib/stt_recorder_web/live/stt/test_with_channel.ex
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
defmodule SttRecorderWeb.Stt.TestWithChannel do
|
||||||
|
use SttRecorderWeb, :live_view
|
||||||
|
|
||||||
|
def mount(_params, _session, socket) do
|
||||||
|
{:ok, socket}
|
||||||
|
end
|
||||||
|
|
||||||
|
def render(assigns) do
|
||||||
|
~H"""
|
||||||
|
<div id="container">
|
||||||
|
<div id="status">Presioná "Start Recording"…</div>
|
||||||
|
<button id="startButton">Start Recording</button>
|
||||||
|
<button id="stopButton" disabled>Stop Recording</button>
|
||||||
|
|
||||||
|
<div id="transcriptionContainer">
|
||||||
|
<div id="transcription" class="realtime"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="fullTextContainer">
|
||||||
|
<div id="fullText"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script type="module">
|
||||||
|
import { Socket } from "https://cdn.skypack.dev/phoenix";
|
||||||
|
|
||||||
|
const statusDiv = document.getElementById("status");
|
||||||
|
const transcriptionDiv = document.getElementById("transcription");
|
||||||
|
const fullTextDiv = document.getElementById("fullText");
|
||||||
|
const startButton = document.getElementById("startButton");
|
||||||
|
const stopButton = document.getElementById("stopButton");
|
||||||
|
|
||||||
|
let socket, channel;
|
||||||
|
let audioContext, mediaStream, mediaProcessor;
|
||||||
|
|
||||||
|
async function startRecording() {
|
||||||
|
startButton.disabled = true;
|
||||||
|
stopButton.disabled = false;
|
||||||
|
statusDiv.textContent = "Recording…";
|
||||||
|
transcriptionDiv.textContent = "";
|
||||||
|
fullTextDiv.textContent = "";
|
||||||
|
|
||||||
|
socket = new Socket("ws://localhost:4000/socket");
|
||||||
|
socket.connect();
|
||||||
|
|
||||||
|
channel = socket.channel("data:lobby");
|
||||||
|
channel.join()
|
||||||
|
.receive("ok", () => {
|
||||||
|
statusDiv.textContent = "🎙 Conectado a Phoenix STT";
|
||||||
|
console.log("Canal conectado");
|
||||||
|
})
|
||||||
|
.receive("error", () => {
|
||||||
|
statusDiv.textContent = "❌ Error al conectar";
|
||||||
|
console.error("Error al conectar canal");
|
||||||
|
});
|
||||||
|
|
||||||
|
channel.on("realtime", payload => {
|
||||||
|
const words = payload.text.split(" ");
|
||||||
|
const lastWord = words.pop();
|
||||||
|
transcriptionDiv.innerHTML = `${words.join(" ")} <span class="last-word">${lastWord}</span>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
channel.on("fullSentence", payload => {
|
||||||
|
fullTextDiv.innerHTML += payload.text + " ";
|
||||||
|
transcriptionDiv.innerHTML = "";
|
||||||
|
});
|
||||||
|
|
||||||
|
audioContext = new AudioContext();
|
||||||
|
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
const input = audioContext.createMediaStreamSource(mediaStream);
|
||||||
|
|
||||||
|
mediaProcessor = audioContext.createScriptProcessor(1024, 1, 1);
|
||||||
|
mediaProcessor.onaudioprocess = (event) => {
|
||||||
|
const float32Array = event.inputBuffer.getChannelData(0);
|
||||||
|
const int16Array = new Int16Array(float32Array.length);
|
||||||
|
for (let i = 0; i < float32Array.length; i++) {
|
||||||
|
int16Array[i] = Math.max(-1, Math.min(1, float32Array[i])) * 0x7FFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(int16Array.buffer)));
|
||||||
|
channel.push("audio_chunk", {
|
||||||
|
data: base64Audio,
|
||||||
|
sample_rate: audioContext.sampleRate
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
input.connect(mediaProcessor);
|
||||||
|
mediaProcessor.connect(audioContext.destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopRecording() {
|
||||||
|
stopButton.disabled = true;
|
||||||
|
startButton.disabled = false;
|
||||||
|
statusDiv.textContent = "🛑 Grabación detenida.";
|
||||||
|
|
||||||
|
if (mediaProcessor) mediaProcessor.disconnect();
|
||||||
|
if (audioContext) audioContext.close();
|
||||||
|
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
|
||||||
|
if (channel) channel.leave();
|
||||||
|
if (socket) socket.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("startButton").onclick = startRecording;
|
||||||
|
document.getElementById("stopButton").onclick = stopRecording;
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.last-word {
|
||||||
|
font-weight: bold;
|
||||||
|
color: orange;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
end
|
||||||
|
end
|
@ -18,7 +18,7 @@ defmodule SttRecorderWeb.Router do
|
|||||||
pipe_through :browser
|
pipe_through :browser
|
||||||
|
|
||||||
get "/", PageController, :home
|
get "/", PageController, :home
|
||||||
live "/sttrecorder", Stt.SttLive
|
live "/testchannel", Stt.TestWithChannel
|
||||||
live "/test", Stt.TestRecorder
|
live "/test", Stt.TestRecorder
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -55,6 +55,7 @@ defmodule SttRecorder.MixProject do
|
|||||||
{:jason, "~> 1.2"},
|
{:jason, "~> 1.2"},
|
||||||
{:dns_cluster, "~> 0.1.1"},
|
{:dns_cluster, "~> 0.1.1"},
|
||||||
{:bandit, "~> 1.5"}
|
{:bandit, "~> 1.5"}
|
||||||
|
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
53
stt_recorder/transcriber.py
Normal file
53
stt_recorder/transcriber.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import sys, json, base64
|
||||||
|
import numpy as np
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from RealtimeSTT import AudioToTextRecorder
|
||||||
|
from scipy.signal import resample
|
||||||
|
import websockets
|
||||||
|
import threading
|
||||||
|
import logging
|
||||||
|
import wave
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Inicializá el modelo (usa tu clase personalizada)
|
||||||
|
model = AudioToTextRecorder(
|
||||||
|
model="base",
|
||||||
|
compute_type="int8",
|
||||||
|
language="es"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(json.dumps({"type": "log", "msg": "✅ Worker iniciado con modelo Whisper"}), flush=True)
|
||||||
|
|
||||||
|
for line in sys.stdin:
|
||||||
|
try:
|
||||||
|
request = json.loads(line.strip())
|
||||||
|
if request["event"] == "transcribe":
|
||||||
|
sample_rate = request.get("sample_rate", 16000)
|
||||||
|
audio_data = base64.b64decode(request["audio_base64"])
|
||||||
|
|
||||||
|
audio_float32 = (
|
||||||
|
np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
|
||||||
|
)
|
||||||
|
|
||||||
|
segments, _ = model.transcribe(audio_float32, sample_rate=sample_rate)
|
||||||
|
text = " ".join([s.text for s in segments if s.text.strip()])
|
||||||
|
print(json.dumps({"type": "realtime", "text": text}), flush=True)
|
||||||
|
elif request["event"] == "set_param":
|
||||||
|
setattr(model, request["param"], request["value"])
|
||||||
|
print(json.dumps({"type": "log", "msg": f"Set {request['param']} to {request['value']}"}), flush=True)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(json.dumps({"type": "log", "msg": f"Evento desconocido: {request.get('event')}"}), flush=True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({
|
||||||
|
"type": "error",
|
||||||
|
"error": str(e),
|
||||||
|
"trace": traceback.format_exc()
|
||||||
|
}), flush=True)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Reference in New Issue
Block a user