Transcripcion en vivo + transcripcion mejorada

This commit is contained in:
2025-07-16 15:50:13 +00:00
parent 8386b685d6
commit 89168522b6
12 changed files with 293 additions and 223 deletions

View File

@ -1,160 +1,201 @@
defmodule WhisperLiveWeb.Live.Recorder do
use WhisperLiveWeb, :live_view
alias Phoenix.PubSub
use WhisperLiveWeb, :live_view
alias Phoenix.PubSub
def mount(_, _, socket) do
if connected?(socket), do: PubSub.subscribe(WhisperLive.PubSub, "transcription:#{socket_id(socket)}")
{:ok, assign(socket, transcription: "")}
end
def mount(_, _, socket) do
PubSub.subscribe(WhisperLive.PubSub, "transcription")
def handle_info({:transcription, raw_json}, socket) do
new_text =
raw_json
|> Jason.decode!()
|> get_in(["chunks", Access.at(0), "text"])
socket =
socket
|> assign(:transcription, "")
|> assign(:transcription_m, "")
{:noreply, update(socket, :transcription, &(&1 <> " " <> new_text))}
end
{:ok, socket}
end
def handle_event("start_recording", _params, socket) do
push_event(socket, "start-recording", %{})
{:noreply, socket}
end
def handle_info({:transcription, raw_json}, socket) do
IO.inspect(raw_json, label: "en vivo ---------------->\n")
def handle_event("stop_recording", _params, socket) do
push_event(socket, "stop-recording", %{})
{:noreply, socket}
end
new_text =
raw_json
|> Jason.decode!()
|> get_in(["chunks", Access.at(0), "text"])
defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()
old_text = socket.assigns.transcription
def render(assigns) do
~H"""
<div id="recorder" data-hook="recorder">
<button id="startButton" phx-click="start_recording">Start Recording</button>
<button id="stopButton" phx-click="stop_recording">Stop Recording</button>
# Sacar lo ya incluido al inicio
added_part = String.replace_prefix(new_text, old_text, "")
<div id="transcriptionContainer">
<div id="transcription" class="realtime"><%= @transcription %></div>
</div>
<div id="status" class="realtime"></div>
{:noreply, update(socket, :transcription, &(&1 <> added_part))}
end
<script type="module">
import { Socket } from "https://cdn.skypack.dev/phoenix"
const startButton = document.getElementById("startButton")
const stopButton = document.getElementById("stopButton")
const statusDiv = document.getElementById("status")
def handle_info({:transcription_m, raw_json}, socket) do
IO.inspect(raw_json, label: "meojada ---------------->\n")
let socket = null
let channel = null
let audioContext = null
let processor = null
let mediaStream = null
let buffer = []
let sendInterval = null
new_text =
raw_json
|> Jason.decode!()
|> get_in(["chunks", Access.at(0), "text"])
{:noreply, update(socket, :transcription_m, &(&1 <> " " <> new_text))}
end
const sampleRate = 48000
def handle_event("start_recording", _params, socket) do
push_event(socket, "start-recording", %{})
{:noreply, assign(socket, transcription: "", transcription_m: "")}
end
async function startRecording() {
startButton.disabled = true
stopButton.disabled = false
statusDiv.textContent = "🎙 Grabando..."
socket = new Socket("ws://localhost:4004/socket")
socket.connect()
channel = socket.channel("audio:lobby")
def handle_event("stop_recording", _params, socket) do
push_event(socket, "stop-recording", %{})
{:noreply, socket}
end
await channel.join()
.receive("ok", () => {
console.log("✅ Canal conectado")
statusDiv.textContent = "✅ Canal conectado"
})
.receive("error", () => {
console.error("❌ Error al conectar canal")
statusDiv.textContent = "❌ Error canal"
})
defp socket_id(socket), do: socket.transport_pid |> :erlang.pid_to_list() |> List.to_string()
try {
audioContext = new AudioContext({ sampleRate })
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
} catch (err) {
console.error("❌ Micrófono error:", err)
statusDiv.textContent = "❌ Error accediendo al micrófono"
return
}
def render(assigns) do
~H"""
<div id="recorder" data-hook="recorder">
<div class="flex space-x-2">
<button id="startButton" phx-click="start_recording" class="px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600">
Start Recording
</button>
<button id="stopButton" phx-click="stop_recording" class="px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600">
Stop Recording
</button>
</div>
const source = audioContext.createMediaStreamSource(mediaStream)
processor = audioContext.createScriptProcessor(4096, 1, 1)
source.connect(processor)
processor.connect(audioContext.destination)
<div id="status" class="text-sm text-gray-600"></div>
buffer = []
processor.onaudioprocess = e => {
const input = e.inputBuffer.getChannelData(0)
const pcm = new Int16Array(input.length)
for (let i = 0; i < input.length; i++) {
let s = Math.max(-1, Math.min(1, input[i]))
pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
}
buffer.push(pcm)
}
<div id="transcriptionContainer" class="space-y-2">
<div class="p-2 bg-gray-100 rounded shadow">
<h2 class="text-sm font-semibold text-gray-700 mb-1">🟠 Transcripción en vivo</h2>
<p id="transcription" class="text-orange-600 whitespace-pre-wrap"><%= @transcription %></p>
</div>
sendInterval = setInterval(() => {
if (buffer.length === 0) return
const merged = flattenInt16(buffer)
buffer = []
<%= if @transcription_m != "" do %>
<div class="p-2 bg-gray-100 rounded shadow">
<h2 class="text-sm font-semibold text-gray-700 mb-1">✅ Transcripción mejorada</h2>
<p class="text-green-600 whitespace-pre-wrap"><%= @transcription_m %></p>
</div>
<% end %>
</div>
<script type="module">
import { Socket } from "https://cdn.skypack.dev/phoenix"
function encodeBase64(uint8Array) {
let binary = ''
const len = uint8Array.byteLength
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(uint8Array[i])
}
return btoa(binary)
const startButton = document.getElementById("startButton")
const stopButton = document.getElementById("stopButton")
const statusDiv = document.getElementById("status")
let socket = null
let channel = null
let audioContext = null
let processor = null
let mediaStream = null
let buffer = []
let sendInterval = null
const sampleRate = 48000
async function startRecording() {
startButton.disabled = true
stopButton.disabled = false
statusDiv.textContent = "🎙 Grabando..."
socket = new Socket("ws://localhost:4004/socket")
socket.connect()
channel = socket.channel("audio:lobby")
await channel.join()
.receive("ok", () => {
console.log("✅ Canal conectado")
statusDiv.textContent = "✅ Canal conectado"
})
.receive("error", () => {
console.error("❌ Error al conectar canal")
statusDiv.textContent = "❌ Error canal"
})
try {
audioContext = new AudioContext({ sampleRate })
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
} catch (err) {
console.error("❌ Micrófono error:", err)
statusDiv.textContent = "❌ Error accediendo al micrófono"
return
}
const source = audioContext.createMediaStreamSource(mediaStream)
processor = audioContext.createScriptProcessor(4096, 1, 1)
source.connect(processor)
processor.connect(audioContext.destination)
buffer = []
processor.onaudioprocess = e => {
const input = e.inputBuffer.getChannelData(0)
const pcm = new Int16Array(input.length)
for (let i = 0; i < input.length; i++) {
let s = Math.max(-1, Math.min(1, input[i]))
pcm[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
}
buffer.push(pcm)
}
sendInterval = setInterval(() => {
if (buffer.length === 0) return
const merged = flattenInt16(buffer)
buffer = []
function encodeBase64(uint8Array) {
let binary = ''
const len = uint8Array.byteLength
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(uint8Array[i])
}
return btoa(binary)
}
const base64 = encodeBase64(new Uint8Array(merged.buffer))
channel.push("audio_chunk", { data: base64, sample_rate: sampleRate })
console.log("📤 Enviado chunk")
}, 2000)
}
const base64 = encodeBase64(new Uint8Array(merged.buffer))
channel.push("audio_chunk", { data: base64, sample_rate: sampleRate })
console.log("📤 Enviado chunk")
}, 2000)
}
function stopRecording() {
stopButton.disabled = true
startButton.disabled = false
statusDiv.textContent = "🛑 Grabación detenida."
function stopRecording() {
stopButton.disabled = true
startButton.disabled = false
statusDiv.textContent = "🛑 Grabación detenida."
if (processor) processor.disconnect()
if (audioContext) audioContext.close()
if (mediaStream) mediaStream.getTracks().forEach(t => t.stop())
if (sendInterval) clearInterval(sendInterval)
if (processor) processor.disconnect()
if (audioContext) audioContext.close()
if (mediaStream) mediaStream.getTracks().forEach(t => t.stop())
if (sendInterval) clearInterval(sendInterval)
if (channel) {
channel.push("stop_audio")
setTimeout(() => {
channel.leave()
socket.disconnect()
console.log("🔌 Socket cerrado")
}, 500)
}
}
if (channel) {
channel.push("stop_audio")
setTimeout(() => {
channel.leave()
socket.disconnect()
console.log("🔌 Socket cerrado")
}, 500)
}
}
function flattenInt16(buffers) {
const length = buffers.reduce((acc, b) => acc + b.length, 0)
const out = new Int16Array(length)
let offset = 0
for (const b of buffers) {
out.set(b, offset)
offset += b.length
}
return out
}
function flattenInt16(buffers) {
const length = buffers.reduce((acc, b) => acc + b.length, 0)
const out = new Int16Array(length)
let offset = 0
for (const b of buffers) {
out.set(b, offset)
offset += b.length
}
return out
}
startButton.onclick = startRecording
stopButton.onclick = stopRecording
</script>
</div>
"""
end
startButton.onclick = startRecording
stopButton.onclick = stopRecording
</script>
</div>
"""
end
end