Transcripcion realtime y cambio a la tanscripcion large. Conexion al modelo large v3.

This commit is contained in:
2025-07-02 16:23:19 -03:00
parent 77f87c3655
commit 46df5fc5eb
12 changed files with 211 additions and 24 deletions

View File

@ -1,11 +1,13 @@
defmodule Recognition_VADWeb.DataChannel do
use Phoenix.Channel
require Logger
def join("data:lobby", _params, socket) do
Phoenix.PubSub.subscribe(Recognition_VAD.PubSub, "audio_output")
{:ok, socket}
end
# Parcial
def handle_info({:realtime, msg}, socket) do
push(socket, "realtime", msg)
@ -14,15 +16,27 @@ defmodule Recognition_VADWeb.DataChannel do
# Completo
def handle_info({:broadcast_audio, msg}, socket) do
push(socket, "transcription", msg)
push(socket, "realtime", msg)
{:noreply, socket}
end
def handle_info({:audio_saved, %{path: _path}}, socket) do
{:noreply, socket}
end
def handle_in("start_recording", _params, socket) do
GenServer.cast(Recognition_VAD.AudioProcessor, :reset)
{:noreply, socket}
end
# Recibe audio codificado en base64 (para transporte seguro)
def handle_in("audio_chunk", %{"data" => base64_chunk, "sample_rate" => sample_rate}, socket) do
Logger.debug("📥 Recibido audio_chunk con sample_rate=#{sample_rate}")
case Base.decode64(base64_chunk) do
{:ok, binary_audio} ->
Recognition_VAD.WhisperStreamer.push_chunk(binary_audio, sample_rate)
# GenServer.cast(Recognition_VAD.AudioProcessor, :save_wav)
GenServer.cast(Recognition_VAD.AudioProcessor, {:chunk, binary_audio, sample_rate}) # ✅ activa esta línea
{:noreply, socket}
:error ->
@ -32,11 +46,14 @@ defmodule Recognition_VADWeb.DataChannel do
end
def handle_in("save_audio", _params, socket) do
GenServer.cast(Recognition_VAD.AudioProcessor, :save_wav)
{:noreply, socket}
end
def handle_in(_unknown, _payload, socket) do
{:noreply, socket}
end
end

View File

@ -17,7 +17,8 @@ defmodule Recognition_VADWeb.Endpoint do
socket "/socket", Recognition_VADWeb.UserSocket,
websocket: true,
longpoll: false
longpoll: false,
pubsub_server: Recognition_VAD.PubSub
# Serve at "/" the static files from "priv/static" directory.
#

View File

@ -1,19 +1,81 @@
defmodule Recognition_VADWeb.Stt.TestWithChannel do
use Recognition_VADWeb, :live_view
require Logger
def mount(_params, _session, socket) do
Phoenix.PubSub.subscribe(Recognition_VAD.PubSub, "large")
socket =
socket
|> assign(improved_transcription: "")
|> assign(audio_path: nil)
|> assign(realtime_transcription: "")
|> assign(improving?: false)
|> assign(view_stop: false)
|> assign(view_start: true)
|> assign(stop_recording: false)
|> assign(:audio_path, nil)
{:ok, socket}
end
def handle_event("start", %{"value" => ""}, socket) do
socket = assign(socket, view_start: false, view_stop: true)
{:noreply, socket}
end
def handle_event("stop_recording", %{"value" => ""}, socket) do
IO.inspect("stop_recording event in LiveView ----------------------")
socket = assign(socket, stop_recording: true)
{:noreply, socket}
end
def handle_info({:large_path, _level, large_path}, socket) do
IO.inspect(large_path, label: "large_path in live view ----------------------\n")
{:noreply, assign(socket, audio_path: large_path)}
end
def handle_info({:transcription_improved, _level, text}, socket) do
IO.inspect(text, label: "Log message received in LiveView ----------------------\n")
File.rm!(socket.assigns.audio_path)
{:noreply, assign(socket, improved_transcription: text, improving?: true)}
end
def render(assigns) do
~H"""
<div id="container">
<div id="status">Presioná "Start Recording"…</div>
<button id="startButton">Start Recording</button>
<button id="stopButton" disabled>Stop Recording</button>
<%= if @view_start == true do %>
<button id="startButton" phx-click="start">Start Recording</button>
<% else %>
<button id="startButton" disabled>Start Recording</button>
<% end %>
<%= if @view_stop == true do %>
<button id="stopButton" phx-click="stop_recording">Stop Recording</button>
<% else %>
<button id="stopButton" disabled>Stop Recording</button>
<% end %>
<%= case [@stop_recording, @improving?] do %>
<% [true, false] -> %>
<div id="status" class="px-3 py-1 text-xs font-medium leading-none font-bold text-blue-900 rounded-full animate-pulse">Mejorando transcripción...</div>
<% [true, true] -> %>
<div id="status">Transcripción Final.</div>
<% _ -> %>
<div id="status">Presioná "Start Recording"…</div>
<% end %>
<div id="transcriptionContainer">
<div id="transcription" class="realtime"></div>
<%= if @improving? == false do %>
<div>
<div id="transcription" phx-update="ignore" class="realtime px-3 py-1 text-xs font-medium leading-none font-bold text-blue-900 rounded-full animate-pulse"></div>
</div>
<% else %>
<div><%= @improved_transcription %></div>
<% end %>
</div>
<script type="module">
@ -28,8 +90,8 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
let audioContext, mediaStream, mediaProcessor;
async function startRecording() {
startButton.disabled = true;
stopButton.disabled = false;
//startButton.disabled = true;
// stopButton.disabled = false;
statusDiv.textContent = "🎙 Grabando…";
transcriptionDiv.innerHTML = "";
@ -42,6 +104,7 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
.receive("ok", () => {
statusDiv.textContent = "✅ Conectado a Phoenix STT";
console.log("Canal conectado");
channel.push("start_recording", {});
})
.receive("error", () => {
statusDiv.textContent = "❌ Error al conectar canal";
@ -65,7 +128,6 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
`;
});
// Frase completa (después de procesar chunks)
channel.on("transcription", payload => {
const sentence = payload.text.trim();
@ -107,15 +169,28 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
}
function stopRecording() {
stopButton.disabled = true;
startButton.disabled = false;
statusDiv.textContent = "🛑 Grabación detenida.";
if (mediaProcessor) mediaProcessor.disconnect();
if (audioContext) audioContext.close();
if (mediaStream) mediaStream.getTracks().forEach(track => track.stop());
if (channel) channel.leave();
if (socket) socket.disconnect();
if (channel) {
channel.push("save_audio", {}).receive("ok", (resp) => {
console.log("Recibí audio_path del canal:", resp.audio_path);
const hookElement = document.getElementById("lv-container");
if (hookElement && resp.audio_path) {
hookElement.dispatchEvent(new CustomEvent("audio_path", { detail: { audio_path: resp.audio_path } }));
}
});
// Esperar 5 segundos antes de cerrar el canal y socket
setTimeout(() => {
console.log("Cerrando canal y socket después de 5 segundos de espera para recibir mensajes tardíos...");
channel.leave();
if (socket) socket.disconnect();
}, 5000);
}
}
document.getElementById("startButton").onclick = startRecording;
@ -139,5 +214,4 @@ defmodule Recognition_VADWeb.Stt.TestWithChannel do
</div>
"""
end
end

View File

@ -19,6 +19,7 @@ defmodule Recognition_VADWeb.Router do
get "/", PageController, :home
live "/sttest", Stt.TestWithChannel
end
# Other scopes may use custom stacks.