diff --git a/whisper_live/assets/css/app.css b/whisper_live/assets/css/app.css index e0d24ce1..2723319c 100644 --- a/whisper_live/assets/css/app.css +++ b/whisper_live/assets/css/app.css @@ -3,8 +3,84 @@ @import "tailwindcss/utilities"; /* This file is for your main application CSS */ -.realtime { - white-space: pre-wrap; - font-family: monospace; - margin-top: 1em; -} + body { + background-color: #f4f4f9; + color: #333; + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + display: flex; + align-items: center; + justify-content: center; + height: 100vh; + margin: 0; + } + #container { + display: flex; + flex-direction: column; + align-items: center; + width: 100%; + max-width: 700px; + padding: 20px; + box-sizing: border-box; + gap: 20px; /* Add more vertical space between items */ + height: 90%; /* Fixed height to prevent layout shift */ + } + #status { + color: #0056b3; + font-size: 20px; + text-align: center; + } + #transcriptionContainer { + height: auto; /* Fixed height for approximately 3 lines of text */ + overflow-y: auto; + width: 100%; + padding: 10px; + box-sizing: border-box; + background-color: #f9f9f9; + border: 1px solid #ddd; + border-radius: 5px; + } + #transcription { + font-size: 18px; + line-height: 1.6; + color: #333; + word-wrap: break-word; + } + #fullTextContainer { + height: 150px; /* Fixed height to prevent layout shift */ + overflow-y: auto; + width: 100%; + padding: 10px; + box-sizing: border-box; + background-color: #f9f9f9; + border: 1px solid #ddd; + border-radius: 5px; + } + #fullText { + color: #4CAF50; + font-size: 18px; + font-weight: 600; + word-wrap: break-word; + } + .last-word { + color: #007bff; + font-weight: 600; + } + button { + padding: 12px 24px; + font-size: 16px; + cursor: pointer; + border: none; + border-radius: 5px; + margin: 5px; + transition: background-color 0.3s ease; + color: #fff; + background-color: #0056b3; + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); + } + button:hover { + background-color: #007bff; + } + button:disabled { + background-color: #cccccc; + cursor: not-allowed; + } diff --git a/whisper_live/lib/whisper_live/audio_buffer.ex b/whisper_live/lib/whisper_live/audio_buffer.ex index 82f759d4..e81e9dbe 100644 --- a/whisper_live/lib/whisper_live/audio_buffer.ex +++ b/whisper_live/lib/whisper_live/audio_buffer.ex @@ -9,6 +9,8 @@ defmodule WhisperLive.AudioBuffer do def get_all(ref), do: GenServer.call(via(ref), :get_all) + def clear(ref), do: GenServer.call(via(ref), :clear) + def stop(ref), do: GenServer.stop(via(ref)) defp via(ref), do: {:via, Registry, {WhisperLive.AudioRegistry, ref}} @@ -20,4 +22,6 @@ defmodule WhisperLive.AudioBuffer do def handle_cast({:append, chunk}, state), do: {:noreply, [chunk | state]} def handle_call(:get_all, _from, state), do: {:reply, Enum.reverse(state), state} + + def handle_call(:clear, _from, _state), do: {:reply, :ok, []} end diff --git a/whisper_live/lib/whisper_live/transcriber.ex b/whisper_live/lib/whisper_live/transcriber.ex index a268e6a6..744e5f7e 100644 --- a/whisper_live/lib/whisper_live/transcriber.ex +++ b/whisper_live/lib/whisper_live/transcriber.ex @@ -33,7 +33,7 @@ defmodule WhisperLive.Transcriber do case send_to_whisper(tmpfile) do {:ok, response} -> - PubSub.broadcast(WhisperLive.PubSub, "transcription:#{ref}", {:transcription, response}) + PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, response}) {:error, reason} -> Logger.warning("Realtime transcription error: #{inspect(reason)}") @@ -90,7 +90,7 @@ defmodule WhisperLive.Transcriber do end defp send_to_whisper(filepath) do - url = "http://localhost:4000/infer" + url = "http://localhost:4000/tiny" {:ok, file_bin} = File.read(filepath) filename = Path.basename(filepath) @@ -108,9 +108,17 @@ defmodule WhisperLive.Transcriber do :httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], []) |> case do - {:ok, {{_, 200, _}, _headers, body}} -> {:ok, to_string(body)} - {:ok, {{_, status, _}, _, body}} -> {:error, {:http_error, status, to_string(body)}} - error -> {:error, error} + {:ok, {{_, 200, _}, _headers, body}} -> + # Logger.info("en transcriber --------------------------\n -> > #{IO.iodata_to_binary(body)}") + # Phoenix.PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, "#{IO.iodata_to_binary(body)}"}) + + {:ok, "#{IO.iodata_to_binary(body)}"} + + {:ok, {{_, status, _}, _, body}} -> + {:error, {:http_error, status,"#{IO.iodata_to_binary(body)}"}} + + error -> + {:error, error} end end end diff --git a/whisper_live/lib/whisper_live_web/channels/audio_channel.ex b/whisper_live/lib/whisper_live_web/channels/audio_channel.ex index 18fa0e0c..ee3c3ee3 100644 --- a/whisper_live/lib/whisper_live_web/channels/audio_channel.ex +++ b/whisper_live/lib/whisper_live_web/channels/audio_channel.ex @@ -11,31 +11,13 @@ defmodule WhisperLiveWeb.AudioChannel do {:ok, socket} end - def handle_in("audio_chunk", %{"data" => base64_audio, "sample_rate" => sample_rate}, socket) do - # 1. Decodificas el audio base64 - {:ok, bin} = Base.decode64(base64_audio) - - # 2. Guardas o procesas el chunk de audio - # Podrías escribirlo en un archivo temporal para enviar a Whisper - tmpfile = tmp_path("chunk_#{socket.assigns.ref}") - :ok = File.write!(tmpfile, encode_wav(bin, sample_rate)) - - # 3. Llamas a la transcripción del chunk (podría ser sync o async) - case send_to_whisper(tmpfile) do - {:ok, transcription} -> - # 4. Envías el texto parcial por PubSub o Push a LiveView/cliente - Phoenix.PubSub.broadcast(YourApp.PubSub, "transcription:#{socket.assigns.ref}", {:transcription, transcription}) - - {:error, reason} -> - Logger.error("Error en transcripción parcial: #{inspect(reason)}") - end - - File.rm(tmpfile) - + def handle_in("audio_chunk", %{"data" => data, "sample_rate" => rate}, socket) do + {:ok, binary} = Base.decode64(data) + AudioBuffer.append(socket_id(socket), {rate, binary}) + Logger.info("📦 Chunk recibido: #{byte_size(binary)} bytes, sample_rate: #{rate}") {:noreply, socket} end - def handle_in("stop_audio", _payload, socket) do Logger.info("🛑 Grabación detenida por cliente") @@ -47,16 +29,8 @@ defmodule WhisperLiveWeb.AudioChannel do filename = "recordings/recording_#{System.system_time(:millisecond)}.wav" File.mkdir_p!("recordings") File.write!(filename, encode_wav(merged, rate)) - Logger.info("💾 Audio guardado en #{filename}") - - # 🔁 Transcribir automáticamente - case send_to_whisper(filename) do - {:ok, response} -> - Logger.info("📝 Transcripción recibida: #{response}") - {:error, reason} -> - Logger.error("❌ Error al transcribir: #{inspect(reason)}") - end - + whisper_large(filename) + File.rm!(filename) _ -> Logger.warning("⚠️ No se recibieron chunks de audio") end @@ -93,18 +67,16 @@ defmodule WhisperLiveWeb.AudioChannel do >> <> data end - defp send_to_whisper(filepath) do - url = "http://localhost:4000/infer" - + defp whisper_large(filepath) do + url = "http://localhost:4000/large" {:ok, file_bin} = File.read(filepath) filename = Path.basename(filepath) headers = [ - {'Content-Type', 'multipart/form-data; boundary=----ElixirBoundary'} + {'Content-Type', 'multipart/form-data; boundary=----ElixirBoundary'} ] - body = - [ + body = [ "------ElixirBoundary\r\n", "Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n", "Content-Type: audio/wav\r\n\r\n", @@ -114,21 +86,17 @@ defmodule WhisperLiveWeb.AudioChannel do :httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], []) |> case do - {:ok, {{_, 200, _}, _headers, body}} -> - {:ok, to_string(body)} + {:ok, {{_, 200, _}, _headers, body}} -> + # Logger.info("transcripcion mejorada --------------------------\n -> > #{IO.iodata_to_binary(body)}") + Phoenix.PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription_m, "#{IO.iodata_to_binary(body)}"}) - {:ok, {{_, status, _}, _, body}} -> - {:error, {:http_error, status, to_string(body)}} + {:ok, "#{IO.iodata_to_binary(body)}"} - error -> - {:error, error} + {:ok, {{_, status, _}, _, body}} -> + {:error, {:http_error, status, IO.iodata_to_binary(body)}} + + error -> + {:error, error} end end - - defp tmp_path(prefix) do - unique = :erlang.unique_integer([:positive]) |> Integer.to_string() - filename = prefix <> "_" <> unique <> ".wav" - Path.join(System.tmp_dir!(), filename) - end - end diff --git a/whisper_live/lib/whisper_live_web/components/layouts/app.html.heex b/whisper_live/lib/whisper_live_web/components/layouts/app.html.heex index 3b3b6074..c99f3415 100644 --- a/whisper_live/lib/whisper_live_web/components/layouts/app.html.heex +++ b/whisper_live/lib/whisper_live_web/components/layouts/app.html.heex @@ -1,32 +1,5 @@ -
-
-
- - - -

- v{Application.spec(:phoenix, :vsn)} -

-
- -
-
-
-
- <.flash_group flash={@flash} /> +
+
{@inner_content}
diff --git a/whisper_live/lib/whisper_live_web/components/layouts/root.html.heex b/whisper_live/lib/whisper_live_web/components/layouts/root.html.heex index 1a98bae9..534c60fd 100644 --- a/whisper_live/lib/whisper_live_web/components/layouts/root.html.heex +++ b/whisper_live/lib/whisper_live_web/components/layouts/root.html.heex @@ -11,7 +11,7 @@ - + {@inner_content} diff --git a/whisper_live/lib/whisper_live_web/live/recorder.ex b/whisper_live/lib/whisper_live_web/live/recorder.ex index 86c84129..f7307f98 100644 --- a/whisper_live/lib/whisper_live_web/live/recorder.ex +++ b/whisper_live/lib/whisper_live_web/live/recorder.ex @@ -1,160 +1,201 @@ defmodule WhisperLiveWeb.Live.Recorder do - use WhisperLiveWeb, :live_view - alias Phoenix.PubSub + use WhisperLiveWeb, :live_view + alias Phoenix.PubSub - def mount(_, _, socket) do - if connected?(socket), do: PubSub.subscribe(WhisperLive.PubSub, "transcription:#{socket_id(socket)}") - {:ok, assign(socket, transcription: "")} - end + def mount(_, _, socket) do + PubSub.subscribe(WhisperLive.PubSub, "transcription") - def handle_info({:transcription, raw_json}, socket) do - new_text = - raw_json - |> Jason.decode!() - |> get_in(["chunks", Access.at(0), "text"]) + socket = + socket + |> assign(:transcription, "") + |> assign(:transcription_m, "") - {:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))} - end + {:ok, socket} + end - def handle_event("start_recording", _params, socket) do - push_event(socket, "start-recording", %{}) - {:noreply, socket} - end + def handle_info({:transcription, raw_json}, socket) do + IO.inspect(raw_json, label: "en vivo ---------------->\n") - def handle_event("stop_recording", _params, socket) do - push_event(socket, "stop-recording", %{}) - {:noreply, socket} - end + new_text = + raw_json + |> Jason.decode!() + |> get_in(["chunks", Access.at(0), "text"]) - defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string() + old_text = socket.assigns.transcription - def render(assigns) do - ~H""" -
- - + # Sacar lo ya incluido al inicio + added_part = String.replace_prefix(new_text, old_text, "") -
-
<%= @transcription %>
-
-
+ {:noreply, update(socket, :transcription, &(&1 <> added_part))} + end - -
- """ - end + startButton.onclick = startRecording + stopButton.onclick = stopRecording + +
+ """ + end end diff --git a/whisper_live/recordings/recording_1752601669350.wav b/whisper_live/recordings/recording_1752601669350.wav deleted file mode 100644 index 0fa4041e..00000000 Binary files a/whisper_live/recordings/recording_1752601669350.wav and /dev/null differ diff --git a/whisper_live/recordings/recording_1752602147301.wav b/whisper_live/recordings/recording_1752602147301.wav deleted file mode 100644 index 581f28c0..00000000 Binary files a/whisper_live/recordings/recording_1752602147301.wav and /dev/null differ diff --git a/whisper_live/recordings/recording_1752605184367.wav b/whisper_live/recordings/recording_1752605184367.wav deleted file mode 100644 index 4f0c900f..00000000 Binary files a/whisper_live/recordings/recording_1752605184367.wav and /dev/null differ diff --git a/whisper_live/recordings/recording_1752605420377.wav b/whisper_live/recordings/recording_1752605420377.wav deleted file mode 100644 index 9c083894..00000000 Binary files a/whisper_live/recordings/recording_1752605420377.wav and /dev/null differ diff --git a/whisper_live/recordings/recording_1752678344186.wav b/whisper_live/recordings/recording_1752678344186.wav new file mode 100644 index 00000000..7b3dd449 Binary files /dev/null and b/whisper_live/recordings/recording_1752678344186.wav differ