correccion VAD en formato binario pcm16 - agrego transcripcion al live
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -1,8 +1,8 @@ | ||||
| {application,whisper, | ||||
|              [{modules,['Elixir.AudioBuffer','Elixir.AudioFilesList', | ||||
|                         'Elixir.AudioSaver','Elixir.Whisper', | ||||
|                         'Elixir.Whisper.Application','Elixir.Whisper.Counter', | ||||
|                         'Elixir.Whisper.LargeModel','Elixir.Whisper.Mailer', | ||||
|              [{modules,['Elixir.AudioBuffer','Elixir.AudioSaver', | ||||
|                         'Elixir.Whisper','Elixir.Whisper.Application', | ||||
|                         'Elixir.Whisper.Counter','Elixir.Whisper.LargeModel', | ||||
|                         'Elixir.Whisper.Mailer', | ||||
|                         'Elixir.Whisper.RealtimeModel', | ||||
|                         'Elixir.Whisper.SendToModel', | ||||
|                         'Elixir.Whisper.Transcriber','Elixir.WhisperWeb', | ||||
|  | ||||
| @ -4,7 +4,6 @@ export const VadHook = { | ||||
|   async mounted() { | ||||
|     const statusDiv = document.getElementById("vad-status"); | ||||
|  | ||||
|     // Cargar onnxruntime y luego vad-web | ||||
|     const ortScript = document.createElement("script"); | ||||
|     ortScript.src = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js"; | ||||
|  | ||||
| @ -13,7 +12,6 @@ export const VadHook = { | ||||
|  | ||||
|     ortScript.onload = () => { | ||||
|       vadScript.onload = async () => { | ||||
|         // Inicializar canal Phoenix | ||||
|         this.socket = new Socket("ws://localhost:4003/socket"); | ||||
|         this.socket.connect(); | ||||
|         this.channel = this.socket.channel("audio:lobby"); | ||||
| @ -21,40 +19,32 @@ export const VadHook = { | ||||
|           console.log("✅ Canal audio:lobby unido."); | ||||
|         }); | ||||
|  | ||||
|         // Preparar VAD pero no arrancar aún | ||||
|         this.myvad = await vad.MicVAD.new({ | ||||
|         const myvad = await vad.MicVAD.new({ | ||||
|           onSpeechStart: () => { | ||||
|             statusDiv.textContent = "🎤 Voz detectada..."; | ||||
|           }, | ||||
|           onSpeechEnd: async (float32Audio) => { | ||||
|             statusDiv.textContent = "✅ Voz finalizada. Enviando audio..."; | ||||
|  | ||||
|             // Enviar el audio correctamente formateado | ||||
|             await sendAudioChunk(float32Audio, this.channel); | ||||
|  | ||||
|             // Indicar stop si querés (como payload vacío JSON) | ||||
|             this.channel.push("stop_audio", {}); | ||||
|           } | ||||
|         }); | ||||
|  | ||||
|         // Esperar eventos desde LiveView | ||||
|         this.handleEvent("init-vad", async () => { | ||||
|           await this.myvad.start(); | ||||
|           statusDiv.textContent = "🚀 VAD iniciado."; | ||||
|         }); | ||||
|  | ||||
|         this.handleEvent("stop-vad", async () => { | ||||
|           if (this.myvad) { | ||||
|             await this.myvad.stop(); | ||||
|             statusDiv.textContent = "🛑 VAD detenido."; | ||||
|           } | ||||
|         }); | ||||
|         myvad.start(); | ||||
|         statusDiv.textContent = "🚀 VAD iniciado."; | ||||
|       }; | ||||
|  | ||||
|       document.body.appendChild(vadScript); | ||||
|     }; | ||||
|  | ||||
|     document.body.appendChild(ortScript); | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // Convertir Float32Array a PCM 16-bit | ||||
| // Función de helper para enviar el chunk | ||||
| function float32ToInt16(float32Array) { | ||||
|   const int16Array = new Int16Array(float32Array.length); | ||||
|   for (let i = 0; i < float32Array.length; i++) { | ||||
| @ -64,16 +54,24 @@ function float32ToInt16(float32Array) { | ||||
|   return int16Array; | ||||
| } | ||||
|  | ||||
| // Enviar audio binario al canal | ||||
| async function sendAudioChunk(float32Audio, channel) { | ||||
|   const pcm16 = float32ToInt16(float32Audio); | ||||
|   const header = JSON.stringify({ sample_rate: 16000 }); | ||||
|   const headerBytes = new TextEncoder().encode(header); | ||||
|   const totalLength = 2 + headerBytes.length + pcm16.byteLength; | ||||
|   const audioBytes = new Uint8Array(pcm16.buffer); // same as merged in el otro ejemplo | ||||
|   const totalLength = 2 + headerBytes.length + audioBytes.length; | ||||
|   const buffer = new ArrayBuffer(totalLength); | ||||
|   const view = new DataView(buffer); | ||||
|   view.setUint16(0, headerBytes.length, true); | ||||
|  | ||||
|   // Encabezado: longitud en big endian | ||||
|   view.setUint16(0, headerBytes.length, false); // <== big endian | ||||
|  | ||||
|   // Copiar header y audio al buffer | ||||
|   new Uint8Array(buffer, 2, headerBytes.length).set(headerBytes); | ||||
|   new Uint8Array(buffer, 2 + headerBytes.length).set(new Uint8Array(pcm16.buffer)); | ||||
|   channel.pushBinary(buffer); | ||||
|   new Uint8Array(buffer, 2 + headerBytes.length).set(audioBytes); | ||||
|  | ||||
|   // Enviar el buffer binario | ||||
|   channel.push("audio_chunk", buffer); | ||||
|   console.log("📤 Chunk binario enviado"); | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -26,8 +26,8 @@ defmodule Whisper.Application do | ||||
|       {Phoenix.PubSub, name: Whisper.PubSub}, | ||||
|       WhisperWeb.Endpoint, | ||||
|       Whisper.Counter, | ||||
|       AudioBuffer, | ||||
|       AudioFilesList | ||||
|       AudioBuffer | ||||
|       # AudioFilesList | ||||
|     ] | ||||
|  | ||||
|     opts = [strategy: :one_for_one, name: Whisper.Supervisor] | ||||
|  | ||||
| @ -1,66 +1,66 @@ | ||||
| defmodule AudioFilesList do | ||||
|   use GenServer | ||||
|   require Logger | ||||
|   alias Phoenix.PubSub | ||||
| # defmodule AudioFilesList do | ||||
| #   use GenServer | ||||
| #   require Logger | ||||
| #   alias Phoenix.PubSub | ||||
|  | ||||
|   def start_link(_opts) do | ||||
|     GenServer.start_link(__MODULE__, :idle, name: __MODULE__) | ||||
|   end | ||||
| #   def start_link(_opts) do | ||||
| #     GenServer.start_link(__MODULE__, :idle, name: __MODULE__) | ||||
| #   end | ||||
|  | ||||
|   def add_file(path) do | ||||
|     Logger.debug("add file") | ||||
|     GenServer.cast(__MODULE__, {:new_file, path}) | ||||
|   end | ||||
| #   def add_file(path) do | ||||
| #     Logger.debug("add file") | ||||
| #     GenServer.cast(__MODULE__, {:new_file, path}) | ||||
| #   end | ||||
|  | ||||
|   def init(:idle) do | ||||
|     Logger.info("AudioFilesList iniciado") | ||||
|     {:ok, %{queue: [], processing: false}} | ||||
|   end | ||||
| #   def init(:idle) do | ||||
| #     Logger.info("AudioFilesList iniciado") | ||||
| #     {:ok, %{queue: [], processing: false}} | ||||
| #   end | ||||
|  | ||||
|  | ||||
|   def handle_cast({:new_file, path}, %{queue: queue, processing: false} = state) do | ||||
|     Logger.info("📥 Archivo encolado: #{path}") | ||||
|     queue = queue ++ [path] | ||||
|     [next | rest] = queue | ||||
|     {:noreply, %{queue: rest, processing: false}} | ||||
|   end | ||||
| #   def handle_cast({:new_file, path}, %{queue: queue, processing: false} = state) do | ||||
| #     Logger.info("📥 Archivo encolado: #{path}") | ||||
| #     queue = queue ++ [path] | ||||
| #     [next | rest] = queue | ||||
| #     {:noreply, %{queue: rest, processing: false}} | ||||
| #   end | ||||
|  | ||||
|   def handle_cast({:new_file, path}, %{queue: queue, processing: true} = state) do | ||||
|     {:noreply, %{state | queue: queue ++ [path]}} | ||||
|   end | ||||
| #   def handle_cast({:new_file, path}, %{queue: queue, processing: true} = state) do | ||||
| #     {:noreply, %{state | queue: queue ++ [path]}} | ||||
| #   end | ||||
|  | ||||
|   def handle_info(:done, %{queue: []} = state) do | ||||
|     {:noreply, %{state | processing: false}} | ||||
|   end | ||||
| #   def handle_info(:done, %{queue: []} = state) do | ||||
| #     {:noreply, %{state | processing: false}} | ||||
| #   end | ||||
|  | ||||
|   def handle_info(:done, %{queue: [next | rest]} = state) do | ||||
|     {:noreply, %{state | queue: rest}} | ||||
|   end | ||||
| #   def handle_info(:done, %{queue: [next | rest]} = state) do | ||||
| #     {:noreply, %{state | queue: rest}} | ||||
| #   end | ||||
|  | ||||
|   # def handle_cast({:done_processing, path}, %{queue: queue} = state) do | ||||
|   #   new_queue = Enum.reject(queue, fn p -> p == path end) | ||||
|   #   Logger.info("🗑️ Archivo eliminado y removido de la cola: #{path}") | ||||
|   #   {:noreply, %{state | queue: new_queue}} | ||||
|   # end | ||||
| #   # def handle_cast({:done_processing, path}, %{queue: queue} = state) do | ||||
| #   #   new_queue = Enum.reject(queue, fn p -> p == path end) | ||||
| #   #   Logger.info("🗑️ Archivo eliminado y removido de la cola: #{path}") | ||||
| #   #   {:noreply, %{state | queue: new_queue}} | ||||
| #   # end | ||||
|  | ||||
|   # defp process_file(path) do | ||||
|   #   Logger.info("▶️ Inicia procesamiento realtime: #{path}") | ||||
| #   # defp process_file(path) do | ||||
| #   #   Logger.info("▶️ Inicia procesamiento realtime: #{path}") | ||||
|  | ||||
|   #   Task.start(fn -> | ||||
|   #       case Whisper.SendToModel.realtime(path) do | ||||
|   #         {:ok, text} when is_binary(text) -> | ||||
|   #           message = %{"chunks" => [%{"text" => text}]} | ||||
|   #           Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)}) | ||||
|   #           Logger.info("✅ Transcripción (realtime): #{text}") | ||||
|   #           File.rm!(path) | ||||
|   #           # AudioFilesList.done_processing(path) | ||||
|   #         {:error, reason} -> | ||||
|   #           Logger.error("❌ Error transcribiendo: #{inspect(reason)}") | ||||
|   #       end | ||||
| #   #   Task.start(fn -> | ||||
| #   #       case Whisper.SendToModel.realtime(path) do | ||||
| #   #         {:ok, text} when is_binary(text) -> | ||||
| #   #           message = %{"chunks" => [%{"text" => text}]} | ||||
| #   #           Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)}) | ||||
| #   #           Logger.info("✅ Transcripción (realtime): #{text}") | ||||
| #   #           File.rm!(path) | ||||
| #   #           # AudioFilesList.done_processing(path) | ||||
| #   #         {:error, reason} -> | ||||
| #   #           Logger.error("❌ Error transcribiendo: #{inspect(reason)}") | ||||
| #   #       end | ||||
|         | ||||
|   #       send(__MODULE__, :done) | ||||
| #   #       send(__MODULE__, :done) | ||||
|  | ||||
|   #   end) | ||||
|   # end | ||||
| #   #   end) | ||||
| #   # end | ||||
|  | ||||
| end | ||||
| # end | ||||
|  | ||||
| @ -26,22 +26,20 @@ defmodule WhisperWeb.AudioChannel do | ||||
|     <<header_len::16, rest::binary>> = raw_binary | ||||
|     <<header::binary-size(header_len), audio::binary>> = rest | ||||
|  | ||||
|     IO.inspect(header, label: "HEADER BINARIO RECIBIDO") | ||||
|     %{"sample_rate" => rate} = Jason.decode!(header) | ||||
|     ref = socket_id(socket) | ||||
|  | ||||
|     case Jason.decode(header) do | ||||
|       {:ok, %{"sample_rate" => rate}} -> | ||||
|         Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}") | ||||
|         AudioBuffer.append(socket_id(socket), {rate, audio}) | ||||
|         {:noreply, socket} | ||||
|     Logger.info("Chunk recibido: #{byte_size(audio)} bytes, sample_rate: #{rate}") | ||||
|     AudioBuffer.append(ref, {rate, audio}) | ||||
|  | ||||
|       {:error, reason} -> | ||||
|         Logger.error("Error decodificando header JSON: #{inspect(reason)}") | ||||
|         {:noreply, socket} | ||||
|     end | ||||
|     # {:ok, path} = AudioSaver.save_chunk_as_wav(ref, audio, rate, "part") | ||||
|     # AudioFilesList.add_file(path) | ||||
|  | ||||
|  | ||||
|     {:noreply, socket} | ||||
|   end | ||||
|  | ||||
|  | ||||
|  | ||||
|   @doc """ | ||||
|   Recupera todos los chunks acumulados en el buffer, los concatena y guarda un archivo WAV final (sufijo `"final"`). | ||||
|   """ | ||||
| @ -60,8 +58,8 @@ defmodule WhisperWeb.AudioChannel do | ||||
|       Task.start(fn -> | ||||
|         transcription = Whisper.SendToModel.large(path) | ||||
|         Logger.info("✅ Transcripción completa:\n#{transcription}") | ||||
|         # message = %{"chunks" => [%{"text" => transcription}]} | ||||
|         # Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription_m, Jason.encode!(message)}) | ||||
|         message = %{"chunks" => [%{"text" => transcription}]} | ||||
|         Phoenix.PubSub.broadcast(Whisper.PubSub, "transcription", {:transcription, Jason.encode!(message)}) | ||||
|         File.rm!(path) | ||||
|       end) | ||||
|     end | ||||
|  | ||||
| @ -1,28 +1,48 @@ | ||||
| defmodule WhisperWeb.VadLive do | ||||
|     use WhisperWeb, :live_view | ||||
|   alias Phoenix.PubSub | ||||
|  | ||||
|     def mount(_, _, socket) do | ||||
|         {:ok, assign(socket, started: false)} | ||||
|         PubSub.subscribe(Whisper.PubSub, "transcription") | ||||
|  | ||||
|         socket = | ||||
|             socket | ||||
|             |> assign(:transcription, "") | ||||
|             |> assign(:started, false) | ||||
|  | ||||
|         {:ok, socket} | ||||
|     end | ||||
|  | ||||
|     def handle_event("start_vad", _params, socket) do | ||||
|         push_event(socket, "init-vad", %{}) | ||||
|         {:noreply, assign(socket, started: true)} | ||||
|     end | ||||
|  | ||||
|     def handle_info({:transcription, raw_json}, socket) do | ||||
|         new_text = | ||||
|             raw_json | ||||
|             |> Jason.decode!() | ||||
|             |> get_in(["chunks", Access.at(0), "text"]) | ||||
|         {:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))} | ||||
|     end | ||||
|  | ||||
|     def render(assigns) do | ||||
|         ~H""" | ||||
|         <div id="vad-container" phx-hook="VadHook"> | ||||
|             <button phx-click="start_vad" class="btn btn-primary">🎙 Iniciar VAD</button> | ||||
|             <button phx-click="stop_vad" class="btn btn-danger">🛑 Detener VAD</button> | ||||
|  | ||||
|             <div id="vad-status" class="mt-4 text-sm text-gray-700"></div> | ||||
|         </div> | ||||
|      | ||||
|         <div id="transcriptionContainer" class="w-full max-w-2xl space-y-4"> | ||||
|             <%= if @transcription != "" do %> | ||||
|                 <div class="p-4 bg-gray-100 rounded shadow-md"> | ||||
|                     <h2 class="text-sm font-semibold text-gray-700 mb-2">✅ Transcripción</h2> | ||||
|                     <p class="text-green-600 whitespace-pre-wrap break-words text-sm leading-relaxed"><%= @transcription %></p> | ||||
|                 </div> | ||||
|             <% end %> | ||||
|         </div> | ||||
|         """ | ||||
|     end | ||||
|  | ||||
|     def handle_event("start_vad", _, socket) do | ||||
|         push_event(socket, "init-vad", %{}) | ||||
|         {:noreply, socket} | ||||
|     end | ||||
|  | ||||
|     def handle_event("stop_vad", _, socket) do | ||||
|         push_event(socket, "stop-vad", %{}) | ||||
|         {:noreply, socket} | ||||
|     end | ||||
|  | ||||
| end | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user