whisper large v3, arranque run.sh

This commit is contained in:
2025-08-04 19:23:40 +00:00
parent 08cf12beb1
commit 4a22a68ce3
1244 changed files with 235 additions and 207 deletions

View File

@ -31,49 +31,69 @@ defmodule WhisperWeb.AudioChannel do
Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}")
AudioBuffer.append(ref, {rate, audio})
chunks = AudioBuffer.get_and_clear(ref)
# {:ok, path} = AudioSaver.save_chunk_as_wav(ref, audio, rate, "part")
# AudioFilesList.add_file(path)
start_total = System.monotonic_time(:millisecond)
if chunks != [] do
Task.start(fn ->
[{rate, _} | _] = chunks
full_audio = Enum.map(chunks, fn {_, bin} -> bin end) |> IO.iodata_to_binary()
{wav_time, {:ok, path}} =
:timer.tc(fn ->
AudioSaver.save_chunk_as_wav(ref, full_audio, rate, "part")
end)
model_start = System.monotonic_time(:millisecond)
Logger.info("WAV guardado en #{div(wav_time, 1000)} ms")
transcription =
if path do
case Nx.Serving.batched_run(Whisper.LargeModel.Serving, {:file, path}) do
%{chunks: chunks} ->
chunks
|> Enum.map(& &1.text)
|> Enum.join(" ")
_ ->
"Transcripción no disponible"
end
else
"Archivo no disponible"
end
model_end = System.monotonic_time(:millisecond)
Logger.info("El modelo procesó en #{model_end - model_start} ms")
Logger.info("✅ Transcripción:\n#{transcription}")
message = %{"chunks" => [%{"text" => transcription}]}
PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, %{
"received_at" => model_start,
"text" => transcription
}})
File.rm!(path)
end_total = System.monotonic_time(:millisecond)
Logger.info("⏱ Total procesamiento stop_audio: #{end_total - start_total} ms")
end)
end
{:noreply, socket}
end
@doc """
Recupera todos los chunks acumulados en el buffer, los concatena y guarda un archivo WAV final (sufijo `"final"`).
"""
def handle_in("stop_audio", _payload, socket) do
Logger.info("🛑 Grabación detenida por cliente")
ref = socket_id(socket)
chunks = AudioBuffer.get_and_clear(ref)
if chunks != [] do
[{rate, _} | _] = chunks
full_audio = Enum.map(chunks, fn {_, bin} -> bin end) |> IO.iodata_to_binary()
{:ok, path} = AudioSaver.save_chunk_as_wav(ref, full_audio, rate, "final")
Task.start(fn ->
transcription = Whisper.SendToModel.large(path)
Logger.info("✅ Transcripción completa:\n#{transcription}")
message = %{"chunks" => [%{"text" => transcription}]}
Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)})
File.rm!(path)
end)
end
{:noreply, socket}
end
defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()
def save_raw(ref, bin) do
File.mkdir_p!("recordings/")
filename = "#{ref}_#{Whisper.Counter.next(ref)}.raw"
path = Path.join("recordings", filename)
File.write!(path, bin)
{:ok, path}
end
end

View File

@ -1,32 +1,6 @@
<header class="px-4 sm:px-6 lg:px-8">
<div class="flex items-center justify-between border-b border-zinc-100 py-3 text-sm">
<div class="flex items-center gap-4">
<a href="/">
<img src={~p"/images/logo.svg"} width="36" />
</a>
<p class="bg-brand/5 text-brand rounded-full px-2 font-medium leading-6">
v{Application.spec(:phoenix, :vsn)}
</p>
</div>
<div class="flex items-center gap-4 font-semibold leading-6 text-zinc-900">
<a href="https://twitter.com/elixirphoenix" class="hover:text-zinc-700">
@elixirphoenix
</a>
<a href="https://github.com/phoenixframework/phoenix" class="hover:text-zinc-700">
GitHub
</a>
<a
href="https://hexdocs.pm/phoenix/overview.html"
class="rounded-lg bg-zinc-100 px-2 py-1 hover:bg-zinc-200/80"
>
Get Started <span aria-hidden="true">&rarr;</span>
</a>
</div>
</div>
</header>
<main class="px-4 py-20 sm:px-6 lg:px-8">
<div class="mx-auto max-w-2xl">
<.flash_group flash={@flash} />
{@inner_content}
</div>
</main>

View File

@ -9,6 +9,7 @@ defmodule WhisperWeb.VadLive do
socket
|> assign(:transcription, "")
|> assign(:started, false)
|> assign(:transcriptions, [])
{:ok, socket}
end
@ -18,31 +19,50 @@ defmodule WhisperWeb.VadLive do
{:noreply, assign(socket, started: true)}
end
def handle_info({:transcription, raw_json}, socket) do
new_text =
raw_json
|> Jason.decode!()
|> get_in(["chunks", Access.at(0), "text"])
{:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))}
def handle_event("stop_vad", _params, socket) do
push_event(socket, "stop-vad", %{})
{:noreply, assign(socket, started: false)}
end
def handle_info({:transcription, %{"received_at" => ts, "text" => new_text}}, socket) do
updated_transcriptions =
[%{received_at: ts, text: new_text} | socket.assigns.transcriptions]
|> Enum.sort_by(& &1.received_at)
final_text =
updated_transcriptions
|> Enum.map_join(" ", & &1.text)
socket =
socket
|> assign(:transcriptions, updated_transcriptions)
|> assign(:transcription, final_text)
{:noreply, socket}
end
def render(assigns) do
~H"""
~H"""
<div id="vad-container" phx-hook="VadHook">
<button phx-click="start_vad" class="btn btn-primary">🎙 Iniciar VAD</button>
<%= if !@started do %>
<button phx-click="start_vad" class="btn btn-primary">🎙 Iniciar VAD</button>
<% end %>
<div id="vad-status" class="mt-4 text-sm text-gray-700"></div>
</div>
<div id="transcriptionContainer" class="w-full max-w-2xl space-y-4">
<%= if @transcription != "" do %>
<div class="p-4 bg-gray-100 rounded shadow-md">
<h2 class="text-sm font-semibold text-gray-700 mb-2">✅ Transcripción</h2>
<p class="text-green-600 whitespace-pre-wrap break-words text-sm leading-relaxed"><%= @transcription %></p>
</div>
<div class="p-4 bg-gray-100 rounded shadow-md">
<h2 class="text-sm font-semibold text-gray-700 mb-2">✅ Transcripción</h2>
<p class="text-green-600 whitespace-pre-wrap break-words text-sm leading-relaxed">
<%= @transcription %>
</p>
</div>
<% end %>
</div>
"""
"""
end
end