Transcripcion en vivo + transcripcion mejorada
This commit is contained in:
@ -3,8 +3,84 @@
|
|||||||
@import "tailwindcss/utilities";
|
@import "tailwindcss/utilities";
|
||||||
|
|
||||||
/* This file is for your main application CSS */
|
/* This file is for your main application CSS */
|
||||||
.realtime {
|
body {
|
||||||
white-space: pre-wrap;
|
background-color: #f4f4f9;
|
||||||
font-family: monospace;
|
color: #333;
|
||||||
margin-top: 1em;
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||||
}
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 100vh;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
#container {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
width: 100%;
|
||||||
|
max-width: 700px;
|
||||||
|
padding: 20px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
gap: 20px; /* Add more vertical space between items */
|
||||||
|
height: 90%; /* Fixed height to prevent layout shift */
|
||||||
|
}
|
||||||
|
#status {
|
||||||
|
color: #0056b3;
|
||||||
|
font-size: 20px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
#transcriptionContainer {
|
||||||
|
height: auto; /* Fixed height for approximately 3 lines of text */
|
||||||
|
overflow-y: auto;
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
background-color: #f9f9f9;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
border-radius: 5px;
|
||||||
|
}
|
||||||
|
#transcription {
|
||||||
|
font-size: 18px;
|
||||||
|
line-height: 1.6;
|
||||||
|
color: #333;
|
||||||
|
word-wrap: break-word;
|
||||||
|
}
|
||||||
|
#fullTextContainer {
|
||||||
|
height: 150px; /* Fixed height to prevent layout shift */
|
||||||
|
overflow-y: auto;
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
background-color: #f9f9f9;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
border-radius: 5px;
|
||||||
|
}
|
||||||
|
#fullText {
|
||||||
|
color: #4CAF50;
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
word-wrap: break-word;
|
||||||
|
}
|
||||||
|
.last-word {
|
||||||
|
color: #007bff;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
button {
|
||||||
|
padding: 12px 24px;
|
||||||
|
font-size: 16px;
|
||||||
|
cursor: pointer;
|
||||||
|
border: none;
|
||||||
|
border-radius: 5px;
|
||||||
|
margin: 5px;
|
||||||
|
transition: background-color 0.3s ease;
|
||||||
|
color: #fff;
|
||||||
|
background-color: #0056b3;
|
||||||
|
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
||||||
|
}
|
||||||
|
button:hover {
|
||||||
|
background-color: #007bff;
|
||||||
|
}
|
||||||
|
button:disabled {
|
||||||
|
background-color: #cccccc;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
@ -9,6 +9,8 @@ defmodule WhisperLive.AudioBuffer do
|
|||||||
|
|
||||||
def get_all(ref), do: GenServer.call(via(ref), :get_all)
|
def get_all(ref), do: GenServer.call(via(ref), :get_all)
|
||||||
|
|
||||||
|
def clear(ref), do: GenServer.call(via(ref), :clear)
|
||||||
|
|
||||||
def stop(ref), do: GenServer.stop(via(ref))
|
def stop(ref), do: GenServer.stop(via(ref))
|
||||||
|
|
||||||
defp via(ref), do: {:via, Registry, {WhisperLive.AudioRegistry, ref}}
|
defp via(ref), do: {:via, Registry, {WhisperLive.AudioRegistry, ref}}
|
||||||
@ -20,4 +22,6 @@ defmodule WhisperLive.AudioBuffer do
|
|||||||
def handle_cast({:append, chunk}, state), do: {:noreply, [chunk | state]}
|
def handle_cast({:append, chunk}, state), do: {:noreply, [chunk | state]}
|
||||||
|
|
||||||
def handle_call(:get_all, _from, state), do: {:reply, Enum.reverse(state), state}
|
def handle_call(:get_all, _from, state), do: {:reply, Enum.reverse(state), state}
|
||||||
|
|
||||||
|
def handle_call(:clear, _from, _state), do: {:reply, :ok, []}
|
||||||
end
|
end
|
||||||
|
@ -33,7 +33,7 @@ defmodule WhisperLive.Transcriber do
|
|||||||
|
|
||||||
case send_to_whisper(tmpfile) do
|
case send_to_whisper(tmpfile) do
|
||||||
{:ok, response} ->
|
{:ok, response} ->
|
||||||
PubSub.broadcast(WhisperLive.PubSub, "transcription:#{ref}", {:transcription, response})
|
PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, response})
|
||||||
|
|
||||||
{:error, reason} ->
|
{:error, reason} ->
|
||||||
Logger.warning("Realtime transcription error: #{inspect(reason)}")
|
Logger.warning("Realtime transcription error: #{inspect(reason)}")
|
||||||
@ -90,7 +90,7 @@ defmodule WhisperLive.Transcriber do
|
|||||||
end
|
end
|
||||||
|
|
||||||
defp send_to_whisper(filepath) do
|
defp send_to_whisper(filepath) do
|
||||||
url = "http://localhost:4000/infer"
|
url = "http://localhost:4000/tiny"
|
||||||
{:ok, file_bin} = File.read(filepath)
|
{:ok, file_bin} = File.read(filepath)
|
||||||
filename = Path.basename(filepath)
|
filename = Path.basename(filepath)
|
||||||
|
|
||||||
@ -108,9 +108,17 @@ defmodule WhisperLive.Transcriber do
|
|||||||
|
|
||||||
:httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], [])
|
:httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], [])
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, {{_, 200, _}, _headers, body}} -> {:ok, to_string(body)}
|
{:ok, {{_, 200, _}, _headers, body}} ->
|
||||||
{:ok, {{_, status, _}, _, body}} -> {:error, {:http_error, status, to_string(body)}}
|
# Logger.info("en transcriber --------------------------\n -> > #{IO.iodata_to_binary(body)}")
|
||||||
error -> {:error, error}
|
# Phoenix.PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, "#{IO.iodata_to_binary(body)}"})
|
||||||
|
|
||||||
|
{:ok, "#{IO.iodata_to_binary(body)}"}
|
||||||
|
|
||||||
|
{:ok, {{_, status, _}, _, body}} ->
|
||||||
|
{:error, {:http_error, status,"#{IO.iodata_to_binary(body)}"}}
|
||||||
|
|
||||||
|
error ->
|
||||||
|
{:error, error}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -11,31 +11,13 @@ defmodule WhisperLiveWeb.AudioChannel do
|
|||||||
{:ok, socket}
|
{:ok, socket}
|
||||||
end
|
end
|
||||||
|
|
||||||
def handle_in("audio_chunk", %{"data" => base64_audio, "sample_rate" => sample_rate}, socket) do
|
def handle_in("audio_chunk", %{"data" => data, "sample_rate" => rate}, socket) do
|
||||||
# 1. Decodificas el audio base64
|
{:ok, binary} = Base.decode64(data)
|
||||||
{:ok, bin} = Base.decode64(base64_audio)
|
AudioBuffer.append(socket_id(socket), {rate, binary})
|
||||||
|
Logger.info("📦 Chunk recibido: #{byte_size(binary)} bytes, sample_rate: #{rate}")
|
||||||
# 2. Guardas o procesas el chunk de audio
|
|
||||||
# Podrías escribirlo en un archivo temporal para enviar a Whisper
|
|
||||||
tmpfile = tmp_path("chunk_#{socket.assigns.ref}")
|
|
||||||
:ok = File.write!(tmpfile, encode_wav(bin, sample_rate))
|
|
||||||
|
|
||||||
# 3. Llamas a la transcripción del chunk (podría ser sync o async)
|
|
||||||
case send_to_whisper(tmpfile) do
|
|
||||||
{:ok, transcription} ->
|
|
||||||
# 4. Envías el texto parcial por PubSub o Push a LiveView/cliente
|
|
||||||
Phoenix.PubSub.broadcast(YourApp.PubSub, "transcription:#{socket.assigns.ref}", {:transcription, transcription})
|
|
||||||
|
|
||||||
{:error, reason} ->
|
|
||||||
Logger.error("Error en transcripción parcial: #{inspect(reason)}")
|
|
||||||
end
|
|
||||||
|
|
||||||
File.rm(tmpfile)
|
|
||||||
|
|
||||||
{:noreply, socket}
|
{:noreply, socket}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def handle_in("stop_audio", _payload, socket) do
|
def handle_in("stop_audio", _payload, socket) do
|
||||||
Logger.info("🛑 Grabación detenida por cliente")
|
Logger.info("🛑 Grabación detenida por cliente")
|
||||||
|
|
||||||
@ -47,16 +29,8 @@ defmodule WhisperLiveWeb.AudioChannel do
|
|||||||
filename = "recordings/recording_#{System.system_time(:millisecond)}.wav"
|
filename = "recordings/recording_#{System.system_time(:millisecond)}.wav"
|
||||||
File.mkdir_p!("recordings")
|
File.mkdir_p!("recordings")
|
||||||
File.write!(filename, encode_wav(merged, rate))
|
File.write!(filename, encode_wav(merged, rate))
|
||||||
Logger.info("💾 Audio guardado en #{filename}")
|
whisper_large(filename)
|
||||||
|
File.rm!(filename)
|
||||||
# 🔁 Transcribir automáticamente
|
|
||||||
case send_to_whisper(filename) do
|
|
||||||
{:ok, response} ->
|
|
||||||
Logger.info("📝 Transcripción recibida: #{response}")
|
|
||||||
{:error, reason} ->
|
|
||||||
Logger.error("❌ Error al transcribir: #{inspect(reason)}")
|
|
||||||
end
|
|
||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
Logger.warning("⚠️ No se recibieron chunks de audio")
|
Logger.warning("⚠️ No se recibieron chunks de audio")
|
||||||
end
|
end
|
||||||
@ -93,9 +67,8 @@ defmodule WhisperLiveWeb.AudioChannel do
|
|||||||
>> <> data
|
>> <> data
|
||||||
end
|
end
|
||||||
|
|
||||||
defp send_to_whisper(filepath) do
|
defp whisper_large(filepath) do
|
||||||
url = "http://localhost:4000/infer"
|
url = "http://localhost:4000/large"
|
||||||
|
|
||||||
{:ok, file_bin} = File.read(filepath)
|
{:ok, file_bin} = File.read(filepath)
|
||||||
filename = Path.basename(filepath)
|
filename = Path.basename(filepath)
|
||||||
|
|
||||||
@ -103,8 +76,7 @@ defmodule WhisperLiveWeb.AudioChannel do
|
|||||||
{'Content-Type', 'multipart/form-data; boundary=----ElixirBoundary'}
|
{'Content-Type', 'multipart/form-data; boundary=----ElixirBoundary'}
|
||||||
]
|
]
|
||||||
|
|
||||||
body =
|
body = [
|
||||||
[
|
|
||||||
"------ElixirBoundary\r\n",
|
"------ElixirBoundary\r\n",
|
||||||
"Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n",
|
"Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n",
|
||||||
"Content-Type: audio/wav\r\n\r\n",
|
"Content-Type: audio/wav\r\n\r\n",
|
||||||
@ -115,20 +87,16 @@ defmodule WhisperLiveWeb.AudioChannel do
|
|||||||
:httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], [])
|
:httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], [])
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, {{_, 200, _}, _headers, body}} ->
|
{:ok, {{_, 200, _}, _headers, body}} ->
|
||||||
{:ok, to_string(body)}
|
# Logger.info("transcripcion mejorada --------------------------\n -> > #{IO.iodata_to_binary(body)}")
|
||||||
|
Phoenix.PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription_m, "#{IO.iodata_to_binary(body)}"})
|
||||||
|
|
||||||
|
{:ok, "#{IO.iodata_to_binary(body)}"}
|
||||||
|
|
||||||
{:ok, {{_, status, _}, _, body}} ->
|
{:ok, {{_, status, _}, _, body}} ->
|
||||||
{:error, {:http_error, status, to_string(body)}}
|
{:error, {:http_error, status, IO.iodata_to_binary(body)}}
|
||||||
|
|
||||||
error ->
|
error ->
|
||||||
{:error, error}
|
{:error, error}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp tmp_path(prefix) do
|
|
||||||
unique = :erlang.unique_integer([:positive]) |> Integer.to_string()
|
|
||||||
filename = prefix <> "_" <> unique <> ".wav"
|
|
||||||
Path.join(System.tmp_dir!(), filename)
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -1,32 +1,5 @@
|
|||||||
<header class="px-4 sm:px-6 lg:px-8">
|
<main>
|
||||||
<div class="flex items-center justify-between border-b border-zinc-100 py-3 text-sm">
|
<div>
|
||||||
<div class="flex items-center gap-4">
|
|
||||||
<a href="/">
|
|
||||||
<img src={~p"/images/logo.svg"} width="36" />
|
|
||||||
</a>
|
|
||||||
<p class="bg-brand/5 text-brand rounded-full px-2 font-medium leading-6">
|
|
||||||
v{Application.spec(:phoenix, :vsn)}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<div class="flex items-center gap-4 font-semibold leading-6 text-zinc-900">
|
|
||||||
<a href="https://twitter.com/elixirphoenix" class="hover:text-zinc-700">
|
|
||||||
@elixirphoenix
|
|
||||||
</a>
|
|
||||||
<a href="https://github.com/phoenixframework/phoenix" class="hover:text-zinc-700">
|
|
||||||
GitHub
|
|
||||||
</a>
|
|
||||||
<a
|
|
||||||
href="https://hexdocs.pm/phoenix/overview.html"
|
|
||||||
class="rounded-lg bg-zinc-100 px-2 py-1 hover:bg-zinc-200/80"
|
|
||||||
>
|
|
||||||
Get Started <span aria-hidden="true">→</span>
|
|
||||||
</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
<main class="px-4 py-20 sm:px-6 lg:px-8">
|
|
||||||
<div class="mx-auto max-w-2xl">
|
|
||||||
<.flash_group flash={@flash} />
|
|
||||||
{@inner_content}
|
{@inner_content}
|
||||||
</div>
|
</div>
|
||||||
</main>
|
</main>
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
<script defer phx-track-static type="text/javascript" src={~p"/assets/app.js"}>
|
<script defer phx-track-static type="text/javascript" src={~p"/assets/app.js"}>
|
||||||
</script>
|
</script>
|
||||||
</head>
|
</head>
|
||||||
<body class="bg-white">
|
<body>
|
||||||
{@inner_content}
|
{@inner_content}
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -3,24 +3,49 @@ defmodule WhisperLiveWeb.Live.Recorder do
|
|||||||
alias Phoenix.PubSub
|
alias Phoenix.PubSub
|
||||||
|
|
||||||
def mount(_, _, socket) do
|
def mount(_, _, socket) do
|
||||||
if connected?(socket), do: PubSub.subscribe(WhisperLive.PubSub, "transcription:#{socket_id(socket)}")
|
PubSub.subscribe(WhisperLive.PubSub, "transcription")
|
||||||
{:ok, assign(socket, transcription: "")}
|
|
||||||
|
socket =
|
||||||
|
socket
|
||||||
|
|> assign(:transcription, "")
|
||||||
|
|> assign(:transcription_m, "")
|
||||||
|
|
||||||
|
{:ok, socket}
|
||||||
end
|
end
|
||||||
|
|
||||||
def handle_info({:transcription, raw_json}, socket) do
|
def handle_info({:transcription, raw_json}, socket) do
|
||||||
|
IO.inspect(raw_json, label: "en vivo ---------------->\n")
|
||||||
|
|
||||||
new_text =
|
new_text =
|
||||||
raw_json
|
raw_json
|
||||||
|> Jason.decode!()
|
|> Jason.decode!()
|
||||||
|> get_in(["chunks", Access.at(0), "text"])
|
|> get_in(["chunks", Access.at(0), "text"])
|
||||||
|
|
||||||
{:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))}
|
old_text = socket.assigns.transcription
|
||||||
|
|
||||||
|
# Sacar lo ya incluido al inicio
|
||||||
|
added_part = String.replace_prefix(new_text, old_text, "")
|
||||||
|
|
||||||
|
{:noreply, update(socket, :transcription, &(&1 <> added_part))}
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def handle_info({:transcription_m, raw_json}, socket) do
|
||||||
|
IO.inspect(raw_json, label: "meojada ---------------->\n")
|
||||||
|
|
||||||
|
new_text =
|
||||||
|
raw_json
|
||||||
|
|> Jason.decode!()
|
||||||
|
|> get_in(["chunks", Access.at(0), "text"])
|
||||||
|
{:noreply, update(socket, :transcription_m, &(&1 <> " " <> new_text))}
|
||||||
end
|
end
|
||||||
|
|
||||||
def handle_event("start_recording", _params, socket) do
|
def handle_event("start_recording", _params, socket) do
|
||||||
push_event(socket, "start-recording", %{})
|
push_event(socket, "start-recording", %{})
|
||||||
{:noreply, socket}
|
{:noreply, assign(socket, transcription: "", transcription_m: "")}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def handle_event("stop_recording", _params, socket) do
|
def handle_event("stop_recording", _params, socket) do
|
||||||
push_event(socket, "stop-recording", %{})
|
push_event(socket, "stop-recording", %{})
|
||||||
{:noreply, socket}
|
{:noreply, socket}
|
||||||
@ -31,14 +56,30 @@ defmodule WhisperLiveWeb.Live.Recorder do
|
|||||||
def render(assigns) do
|
def render(assigns) do
|
||||||
~H"""
|
~H"""
|
||||||
<div id="recorder" data-hook="recorder">
|
<div id="recorder" data-hook="recorder">
|
||||||
<button id="startButton" phx-click="start_recording">Start Recording</button>
|
<div class="flex space-x-2">
|
||||||
<button id="stopButton" phx-click="stop_recording">Stop Recording</button>
|
<button id="startButton" phx-click="start_recording" class="px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600">
|
||||||
|
Start Recording
|
||||||
<div id="transcriptionContainer">
|
</button>
|
||||||
<div id="transcription" class="realtime"><%= @transcription %></div>
|
<button id="stopButton" phx-click="stop_recording" class="px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600">
|
||||||
|
Stop Recording
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="status" class="realtime"></div>
|
|
||||||
|
|
||||||
|
<div id="status" class="text-sm text-gray-600"></div>
|
||||||
|
|
||||||
|
<div id="transcriptionContainer" class="space-y-2">
|
||||||
|
<div class="p-2 bg-gray-100 rounded shadow">
|
||||||
|
<h2 class="text-sm font-semibold text-gray-700 mb-1">🟠 Transcripción en vivo</h2>
|
||||||
|
<p id="transcription" class="text-orange-600 whitespace-pre-wrap"><%= @transcription %></p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<%= if @transcription_m != "" do %>
|
||||||
|
<div class="p-2 bg-gray-100 rounded shadow">
|
||||||
|
<h2 class="text-sm font-semibold text-gray-700 mb-1">✅ Transcripción mejorada</h2>
|
||||||
|
<p class="text-green-600 whitespace-pre-wrap"><%= @transcription_m %></p>
|
||||||
|
</div>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
<script type="module">
|
<script type="module">
|
||||||
import { Socket } from "https://cdn.skypack.dev/phoenix"
|
import { Socket } from "https://cdn.skypack.dev/phoenix"
|
||||||
|
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
whisper_live/recordings/recording_1752678344186.wav
Normal file
BIN
whisper_live/recordings/recording_1752678344186.wav
Normal file
Binary file not shown.
Reference in New Issue
Block a user