54 lines
1.5 KiB
Elixir
54 lines
1.5 KiB
Elixir
defmodule WhisperServer.WhisperInference do
|
|
use Supervisor
|
|
|
|
@moduledoc """
|
|
Initializes the Whisper model and sets up the serving process.
|
|
"""
|
|
|
|
def start_link(opts) do
|
|
Supervisor.start_link(__MODULE__, opts, name: __MODULE__)
|
|
end
|
|
|
|
def init(_opts) do
|
|
model_name = Application.get_env(:whisper_server, :model_name, "openai/whisper-tiny")
|
|
raw_client = Application.get_env(:whisper_server, :client, :cuda)
|
|
|
|
client =
|
|
case String.to_atom(to_string(raw_client)) do
|
|
:rocm ->
|
|
:cuda
|
|
:cuda ->
|
|
:cuda
|
|
atom -> atom
|
|
end
|
|
|
|
|
|
batch_size = Application.get_env(:whisper_server, :batch_size, 3)
|
|
batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000)
|
|
|
|
Nx.global_default_backend({EXLA.Backend, client: client})
|
|
|
|
{:ok, model} = Bumblebee.load_model({:hf, model_name})
|
|
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name})
|
|
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name})
|
|
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name})
|
|
|
|
serving = Bumblebee.Audio.speech_to_text_whisper(
|
|
model, featurizer, tokenizer, generation_config,
|
|
chunk_num_seconds: 30,
|
|
language: "es",
|
|
defn_options: [compiler: EXLA, client: :cuda]
|
|
)
|
|
|
|
children = [
|
|
{Nx.Serving,
|
|
serving: serving,
|
|
name: __MODULE__.Serving,
|
|
batch_size: batch_size,
|
|
batch_timeout: batch_timeout}
|
|
]
|
|
|
|
Supervisor.init(children, strategy: :one_for_one)
|
|
end
|
|
end
|