56 lines
1.6 KiB
Elixir
56 lines
1.6 KiB
Elixir
defmodule Whisper.LargeModel do
|
|
use Supervisor
|
|
require Logger
|
|
|
|
# @model "openai/whisper-large-v3"
|
|
@model "openai/whisper-large-v3-turbo"
|
|
|
|
def start_link(_opts) do
|
|
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
|
|
end
|
|
|
|
def init(_opts) do
|
|
{:ok, model} = Bumblebee.load_model({:hf, @model})
|
|
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, @model})
|
|
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, @model})
|
|
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, @model})
|
|
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 448)
|
|
|
|
serving =
|
|
Bumblebee.Audio.speech_to_text_whisper(
|
|
model, featurizer, tokenizer, generation_config,
|
|
chunk_num_seconds: 5,
|
|
language: "es",
|
|
timestamps: :segments,
|
|
defn_options: [compiler: EXLA, client: :cuda]
|
|
)
|
|
|
|
children = [
|
|
{Nx.Serving,
|
|
serving: serving,
|
|
name: __MODULE__.Serving,
|
|
batch_size: 1,
|
|
batch_timeout: 0}
|
|
]
|
|
|
|
Task.start(fn ->
|
|
Process.sleep(500)
|
|
|
|
Logger.info("Realizando warm-up de #{@model}... (calentamiento)")
|
|
|
|
samples = Nx.tensor(List.duplicate(0.0, 16_000))
|
|
start_time = System.monotonic_time(:millisecond)
|
|
|
|
try do
|
|
Nx.Serving.batched_run(__MODULE__.Serving, samples)
|
|
elapsed = System.monotonic_time(:millisecond) - start_time
|
|
Logger.info("✅ #{@model} completado en #{elapsed} ms")
|
|
rescue
|
|
e -> Logger.error("#{@model} Error durante warm-up: #{inspect(e)}")
|
|
end
|
|
end)
|
|
|
|
Supervisor.init(children, strategy: :one_for_one)
|
|
end
|
|
end
|