Transcripcion en vivo + transcripcion mejorada

2025-07-16 15:50:13 +00:00
parent 8386b685d6
commit 89168522b6
12 changed files with 293 additions and 223 deletions
--- a/whisper_live/lib/whisper_live_web/live/recorder.ex
+++ b/whisper_live/lib/whisper_live_web/live/recorder.ex
@ -1,160 +1,201 @@
 defmodule WhisperLiveWeb.Live.Recorder do
-  use WhisperLiveWeb, :live_view
-  alias Phoenix.PubSub
+    use WhisperLiveWeb, :live_view
+    alias Phoenix.PubSub

-  def mount(_, _, socket) do
-    if connected?(socket), do: PubSub.subscribe(WhisperLive.PubSub, "transcription:#{socket_id(socket)}")
-    {:ok, assign(socket, transcription: "")}
-  end
+    def mount(_, _, socket) do
+        PubSub.subscribe(WhisperLive.PubSub, "transcription")

-  def handle_info({:transcription, raw_json}, socket) do
-    new_text =
-      raw_json
-      |> Jason.decode!()
-      |> get_in(["chunks", Access.at(0), "text"])
+        socket =
+            socket
+            |> assign(:transcription, "")
+            |> assign(:transcription_m, "")

-    {:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))}
-  end
+        {:ok, socket}
+    end

-  def handle_event("start_recording", _params, socket) do
-    push_event(socket, "start-recording", %{})
-    {:noreply, socket}
-  end
+    def handle_info({:transcription, raw_json}, socket) do
+        IO.inspect(raw_json, label: "en vivo ---------------->\n")

-  def handle_event("stop_recording", _params, socket) do
-    push_event(socket, "stop-recording", %{})
-    {:noreply, socket}
-  end
+        new_text =
+            raw_json
+            |> Jason.decode!()
+            |> get_in(["chunks", Access.at(0), "text"])

-  defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()
+        old_text = socket.assigns.transcription

-  def render(assigns) do
-    ~H"""
-    <div id="recorder" data-hook="recorder">
-      <button id="startButton" phx-click="start_recording">Start Recording</button>
-      <button id="stopButton" phx-click="stop_recording">Stop Recording</button>
+        # Sacar lo ya incluido al inicio
+        added_part = String.replace_prefix(new_text, old_text, "")

-      <div id="transcriptionContainer">
-        <div id="transcription" class="realtime"><%= @transcription %></div>
-      </div>
-      <div id="status" class="realtime"></div>
+        {:noreply, update(socket, :transcription, &(&1 <> added_part))}
+    end

-      <script type="module">
-        import { Socket } from "https://cdn.skypack.dev/phoenix"

-        const startButton = document.getElementById("startButton")
-        const stopButton = document.getElementById("stopButton")
-        const statusDiv = document.getElementById("status")
+    def handle_info({:transcription_m, raw_json}, socket) do
+        IO.inspect(raw_json, label: "meojada ---------------->\n")

-        let socket = null
-        let channel = null
-        let audioContext = null
-        let processor = null
-        let mediaStream = null
-        let buffer = []
-        let sendInterval = null
+        new_text =
+            raw_json
+            |> Jason.decode!()
+            |> get_in(["chunks", Access.at(0), "text"])
+        {:noreply, update(socket, :transcription_m, &(&1 <> " " <> new_text))}
+    end

-        const sampleRate = 48000
+    def handle_event("start_recording", _params, socket) do
+        push_event(socket, "start-recording", %{})
+        {:noreply, assign(socket, transcription: "", transcription_m: "")}
+    end

-        async function startRecording() {
-          startButton.disabled = true
-          stopButton.disabled = false
-          statusDiv.textContent = "🎙 Grabando..."

-          socket = new Socket("ws://localhost:4004/socket")
-          socket.connect()
-          channel = socket.channel("audio:lobby")
+    def handle_event("stop_recording", _params, socket) do
+        push_event(socket, "stop-recording", %{})
+        {:noreply, socket}
+    end

-          await channel.join()
-            .receive("ok", () => {
-              console.log("✅ Canal conectado")
-              statusDiv.textContent = "✅ Canal conectado"
-            })
-            .receive("error", () => {
-              console.error("❌ Error al conectar canal")
-              statusDiv.textContent = "❌ Error canal"
-            })
+    defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()

-          try {
-            audioContext = new AudioContext({ sampleRate })
-            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
-          } catch (err) {
-            console.error("❌ Micrófono error:", err)
-            statusDiv.textContent = "❌ Error accediendo al micrófono"
-            return
-          }
+    def render(assigns) do
+        ~H"""
+        <div id="recorder" data-hook="recorder">
+            <div class="flex space-x-2">
+                <button id="startButton" phx-click="start_recording" class="px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600">
+                Start Recording
+                </button>
+                <button id="stopButton" phx-click="stop_recording" class="px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600">
+                Stop Recording
+                </button>
+            </div>

-          const source = audioContext.createMediaStreamSource(mediaStream)
-          processor = audioContext.createScriptProcessor(4096, 1, 1)
-          source.connect(processor)
-          processor.connect(audioContext.destination)
+            <div id="status" class="text-sm text-gray-600"></div>

-          buffer = []
-          processor.onaudioprocess = e => {
-            const input = e.inputBuffer.getChannelData(0)
-            const pcm = new Int16Array(input.length)
-            for (let i = 0; i < input.length; i++) {
-              let s = Math.max(-1, Math.min(1, input[i]))
-              pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
-            }
-            buffer.push(pcm)
-          }
+            <div id="transcriptionContainer" class="space-y-2">
+                <div class="p-2 bg-gray-100 rounded shadow">
+                <h2 class="text-sm font-semibold text-gray-700 mb-1">🟠 Transcripción en vivo</h2>
+                <p id="transcription" class="text-orange-600 whitespace-pre-wrap"><%= @transcription %></p>
+            </div>

-          sendInterval = setInterval(() => {
-            if (buffer.length === 0) return
-            const merged = flattenInt16(buffer)
-            buffer = []
+            <%= if @transcription_m != "" do %>
+                <div class="p-2 bg-gray-100 rounded shadow">
+                    <h2 class="text-sm font-semibold text-gray-700 mb-1">✅ Transcripción mejorada</h2>
+                    <p class="text-green-600 whitespace-pre-wrap"><%= @transcription_m %></p>
+                </div>
+            <% end %>
+            </div>
+            <script type="module">
+            import { Socket } from "https://cdn.skypack.dev/phoenix"

-            function encodeBase64(uint8Array) {
-              let binary = ''
-              const len = uint8Array.byteLength
-              for (let i = 0; i < len; i++) {
-                binary += String.fromCharCode(uint8Array[i])
-              }
-              return btoa(binary)
+            const startButton = document.getElementById("startButton")
+            const stopButton = document.getElementById("stopButton")
+            const statusDiv = document.getElementById("status")
+
+            let socket = null
+            let channel = null
+            let audioContext = null
+            let processor = null
+            let mediaStream = null
+            let buffer = []
+            let sendInterval = null
+
+            const sampleRate = 48000
+
+            async function startRecording() {
+                startButton.disabled = true
+                stopButton.disabled = false
+                statusDiv.textContent = "🎙 Grabando..."
+
+                socket = new Socket("ws://localhost:4004/socket")
+                socket.connect()
+                channel = socket.channel("audio:lobby")
+
+                await channel.join()
+                .receive("ok", () => {
+                    console.log("✅ Canal conectado")
+                    statusDiv.textContent = "✅ Canal conectado"
+                })
+                .receive("error", () => {
+                    console.error("❌ Error al conectar canal")
+                    statusDiv.textContent = "❌ Error canal"
+                })
+
+                try {
+                audioContext = new AudioContext({ sampleRate })
+                mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+                } catch (err) {
+                console.error("❌ Micrófono error:", err)
+                statusDiv.textContent = "❌ Error accediendo al micrófono"
+                return
+                }
+
+                const source = audioContext.createMediaStreamSource(mediaStream)
+                processor = audioContext.createScriptProcessor(4096, 1, 1)
+                source.connect(processor)
+                processor.connect(audioContext.destination)
+
+                buffer = []
+                processor.onaudioprocess = e => {
+                const input = e.inputBuffer.getChannelData(0)
+                const pcm = new Int16Array(input.length)
+                for (let i = 0; i < input.length; i++) {
+                    let s = Math.max(-1, Math.min(1, input[i]))
+                    pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
+                }
+                buffer.push(pcm)
+                }
+
+                sendInterval = setInterval(() => {
+                if (buffer.length === 0) return
+                const merged = flattenInt16(buffer)
+                buffer = []
+
+                function encodeBase64(uint8Array) {
+                    let binary = ''
+                    const len = uint8Array.byteLength
+                    for (let i = 0; i < len; i++) {
+                    binary += String.fromCharCode(uint8Array[i])
+                    }
+                    return btoa(binary)
+                }
+
+                const base64 = encodeBase64(new Uint8Array(merged.buffer))
+                channel.push("audio_chunk", { data: base64, sample_rate: sampleRate })
+                console.log("📤 Enviado chunk")
+                }, 2000)
            }

-            const base64 = encodeBase64(new Uint8Array(merged.buffer))
-            channel.push("audio_chunk", { data: base64, sample_rate: sampleRate })
-            console.log("📤 Enviado chunk")
-          }, 2000)
-        }
+            function stopRecording() {
+                stopButton.disabled = true
+                startButton.disabled = false
+                statusDiv.textContent = "🛑 Grabación detenida."

-        function stopRecording() {
-          stopButton.disabled = true
-          startButton.disabled = false
-          statusDiv.textContent = "🛑 Grabación detenida."
+                if (processor) processor.disconnect()
+                if (audioContext) audioContext.close()
+                if (mediaStream) mediaStream.getTracks().forEach(t => t.stop())
+                if (sendInterval) clearInterval(sendInterval)

-          if (processor) processor.disconnect()
-          if (audioContext) audioContext.close()
-          if (mediaStream) mediaStream.getTracks().forEach(t => t.stop())
-          if (sendInterval) clearInterval(sendInterval)
+                if (channel) {
+                channel.push("stop_audio")
+                setTimeout(() => {
+                    channel.leave()
+                    socket.disconnect()
+                    console.log("🔌 Socket cerrado")
+                }, 500)
+                }
+            }

-          if (channel) {
-            channel.push("stop_audio")
-            setTimeout(() => {
-              channel.leave()
-              socket.disconnect()
-              console.log("🔌 Socket cerrado")
-            }, 500)
-          }
-        }
+            function flattenInt16(buffers) {
+                const length = buffers.reduce((acc, b) => acc + b.length, 0)
+                const out = new Int16Array(length)
+                let offset = 0
+                for (const b of buffers) {
+                out.set(b, offset)
+                offset += b.length
+                }
+                return out
+            }

-        function flattenInt16(buffers) {
-          const length = buffers.reduce((acc, b) => acc + b.length, 0)
-          const out = new Int16Array(length)
-          let offset = 0
-          for (const b of buffers) {
-            out.set(b, offset)
-            offset += b.length
-          }
-          return out
-        }
-
-        startButton.onclick = startRecording
-        stopButton.onclick = stopRecording
-      </script>
-    </div>
-    """
-  end
+            startButton.onclick = startRecording
+            stopButton.onclick = stopRecording
+            </script>
+        </div>
+        """
+    end
 end