Files
voice_recognition/whisper_live/lib/whisper_live/transcriber.ex
2025-07-18 10:50:51 +00:00

150 lines
4.9 KiB
Elixir

defmodule WhisperLive.Transcriber do
use GenServer
require Logger
alias WhisperLive.AudioBuffer
alias Phoenix.PubSub
@interval_ms 3000
def start_link(ref) do
GenServer.start_link(__MODULE__, ref, name: via_tuple(ref))
end
def stop(ref) do
GenServer.stop(via_tuple(ref), :normal)
end
defp via_tuple(ref), do: {:via, Registry, {WhisperLive.Registry, ref}}
def init(ref) do
schedule()
{:ok, %{ref: ref}}
end
def handle_info(:transcribe, %{ref: ref} = state) do
case AudioBuffer.get_tiny(ref) do
[] -> :noop
[{rate, _} | _] = chunks ->
merged = chunks |> Enum.map(fn {_, bin} -> bin end) |> IO.iodata_to_binary()
tmpfile = "tmp/rt_#{System.system_time(:millisecond)}.wav"
File.mkdir_p!("tmp")
File.write!(tmpfile, encode_wav(merged, rate))
case send_to_whisper(tmpfile) do
{:ok, response} ->
PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, response})
{:error, reason} ->
Logger.warning("Realtime transcription error: #{inspect(reason)}")
end
File.rm(tmpfile)
case GenServer.whereis({:via, Registry, {WhisperLive.AudioRegistry, ref}}) do
pid when is_pid(pid) ->
if Process.alive?(pid) do
AudioBuffer.clear_tiny(ref)
else
Logger.debug("AudioBuffer #{inspect(ref)} no está vivo.")
end
_ ->
Logger.debug("AudioBuffer #{inspect(ref)} no existe.")
end
end
schedule()
{:noreply, state}
end
# def handle_info(:transcribe, %{ref: ref} = state) do
# case AudioBuffer.pop_chunk_with_overlap(ref, 1000) do
# {"", _rate} ->
# :noop
# {audio, rate} ->
# tmpfile = "tmp/rt_#{ref}_#{System.system_time(:millisecond)}.wav"
# File.mkdir_p!("tmp")
# File.write!(tmpfile, encode_wav(audio, rate))
# case send_to_whisper(tmpfile) do
# {:ok, response} ->
# PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, response})
# {:error, reason} ->
# Logger.warning("Realtime transcription error: #{inspect(reason)}")
# end
# end
# schedule()
# {:noreply, state}
# end
defp tmp_path(prefix) do
unique = :erlang.unique_integer([:positive]) |> Integer.to_string()
filename = prefix <> "_" <> unique <> ".wav"
Path.join(System.tmp_dir!(), filename)
end
defp schedule, do: Process.send_after(self(), :transcribe, @interval_ms)
defp encode_wav(data, sample_rate) do
num_channels = 1
bits_per_sample = 16
byte_rate = sample_rate * num_channels * div(bits_per_sample, 8)
block_align = div(bits_per_sample * num_channels, 8)
data_size = byte_size(data)
riff_size = 36 + data_size
<<
"RIFF",
<<riff_size::little-size(32)>>,
"WAVE",
"fmt ",
<<16::little-size(32)>>,
<<1::little-size(16)>>,
<<num_channels::little-size(16)>>,
<<sample_rate::little-size(32)>>,
<<byte_rate::little-size(32)>>,
<<block_align::little-size(16)>>,
<<bits_per_sample::little-size(16)>>,
"data",
<<data_size::little-size(32)>>
>> <> data
end
defp send_to_whisper(filepath) do
url = "http://localhost:4000/tiny"
{:ok, file_bin} = File.read(filepath)
filename = Path.basename(filepath)
headers = [
{'Content-Type', 'multipart/form-data; boundary=----ElixirBoundary'}
]
body = [
"------ElixirBoundary\r\n",
"Content-Disposition: form-data; name=\"file\"; filename=\"#{filename}\"\r\n",
"Content-Type: audio/wav\r\n\r\n",
file_bin,
"\r\n------ElixirBoundary--\r\n"
]
:httpc.request(:post, {url, headers, 'multipart/form-data; boundary=----ElixirBoundary', body}, [], [])
|> case do
{:ok, {{_, 200, _}, _headers, body}} ->
# Logger.info("en transcriber --------------------------\n -> > #{IO.iodata_to_binary(body)}")
# Phoenix.PubSub.broadcast(WhisperLive.PubSub, "transcription", {:transcription, "#{IO.iodata_to_binary(body)}"})
{:ok, "#{IO.iodata_to_binary(body)}"}
{:ok, {{_, status, _}, _, body}} ->
{:error, {:http_error, status,"#{IO.iodata_to_binary(body)}"}}
error ->
{:error, error}
end
end
end