server con post tiny y post large

This commit is contained in:
2025-07-16 15:51:14 +00:00
parent 89168522b6
commit b14d0a7552
15 changed files with 75 additions and 80 deletions

View File

@ -2,6 +2,7 @@
[{modules,['Elixir.WhisperServer', [{modules,['Elixir.WhisperServer',
'Elixir.WhisperServer.Application', 'Elixir.WhisperServer.Application',
'Elixir.WhisperServer.InferenceRunner', 'Elixir.WhisperServer.InferenceRunner',
'Elixir.WhisperServer.Large',
'Elixir.WhisperServer.WhisperInference']}, 'Elixir.WhisperServer.WhisperInference']},
{optional_applications,[]}, {optional_applications,[]},
{applications,[kernel,stdlib,elixir,logger,bumblebee,exla,nx, {applications,[kernel,stdlib,elixir,logger,bumblebee,exla,nx,

View File

@ -9,9 +9,10 @@ defmodule WhisperServer.Application do
Application.put_env(:whisper_server, :batch_size, args[:batch_size]) Application.put_env(:whisper_server, :batch_size, args[:batch_size])
Application.put_env(:whisper_server, :batch_timeout, args[:batch_timeout]) Application.put_env(:whisper_server, :batch_timeout, args[:batch_timeout])
Application.put_env(:whisper_server, :port, args[:port]) Application.put_env(:whisper_server, :port, args[:port])
children = [ children = [
WhisperServer.WhisperInference, WhisperServer.WhisperInference,
WhisperServer.Large,
{Plug.Cowboy, scheme: :http, plug: WhisperServer, options: [port: args[:port]]} {Plug.Cowboy, scheme: :http, plug: WhisperServer, options: [port: args[:port]]}
] ]

View File

@ -0,0 +1,33 @@
defmodule WhisperServer.Large do
use Supervisor
def start_link(_opts) do
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
end
def init(_opts) do
model_name = "openai/whisper-large-v3"
{:ok, model} = Bumblebee.load_model({:hf, model_name})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name})
serving = Bumblebee.Audio.speech_to_text_whisper(
model, featurizer, tokenizer, generation_config,
chunk_num_seconds: 30,
language: "es",
defn_options: [compiler: EXLA, client: :host]
)
children = [
{Nx.Serving,
serving: serving,
name: __MODULE__.Serving,
batch_size: 1,
batch_timeout: 5000}
]
Supervisor.init(children, strategy: :one_for_one)
end
end

View File

@ -0,0 +1,18 @@
# defmodule WhisperServer.TranscriptionSocket do
# @behaviour :cowboy_websocket
# def init(req, _opts) do
# {:cowboy_websocket, req, %{chunks: []}}
# end
# def websocket_init(state), do: {:ok, state}
# def websocket_handle({:binary, data}, state) do
# IO.puts("🟡 Recibido chunk de #{byte_size(data)} bytes")
# {:reply, {:text, ~s({"text": "chunk received, size #{byte_size(data)}"})}, state}
# end
# def websocket_handle(_data, state), do: {:ok, state}
# def websocket_info(_info, state), do: {:ok, state}
# def terminate(_reason, _req, _state), do: :ok
# end

View File

@ -14,13 +14,17 @@ defmodule WhisperServer.WhisperInference do
raw_client = Application.get_env(:whisper_server, :client, :host) raw_client = Application.get_env(:whisper_server, :client, :host)
client = client =
case raw_client do case String.to_atom(to_string(raw_client)) do
:rocm -> :rocm ->
IO.warn("Client :rocm is not supported, falling back to :host") IO.warn("Client :rocm is not supported, falling back to :host")
:host :host
_ -> raw_client :cuda ->
IO.warn("Client :cuda requires GPU support, falling back to :host")
:host
atom -> atom
end end
batch_size = Application.get_env(:whisper_server, :batch_size, 3) batch_size = Application.get_env(:whisper_server, :batch_size, 3)
batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000) batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000)

View File

@ -10,86 +10,12 @@ defmodule WhisperServer do
plug :match plug :match
plug :dispatch plug :dispatch
post "/infer" do post "/tiny" do
handle_request(conn) handle_request(conn)
end end
post "/v1/audio/transcriptions" do post "/large" do
model = conn.params["model"] || "whisper-1" handle_request_large(conn, WhisperServer.Large.Serving)
response_format = conn.params["response_format"] || "json"
if model != "whisper-1" do
send_resp(conn, 400, Jason.encode!(%{error: "Unsupported model"}))
else
upload = conn.params["file"]
case File.read(upload.path) do
{:ok, file_bytes} ->
filename = "uploaded_#{System.unique_integer([:positive])}_#{upload.filename}"
temp_path = Path.join("uploads", filename)
File.mkdir_p!("uploads")
case File.write(temp_path, file_bytes) do
:ok ->
try do
result = WhisperServer.InferenceRunner.run_inference(temp_path)
Logger.info("Inference result: #{inspect(result)}")
result_text = extract_text_from_infer_response(result)
Logger.info("Extracted text: #{result_text}")
case response_format do
"text" ->
conn
|> put_resp_header("Content-Disposition", "attachment; filename=result.txt")
|> send_resp(200, result_text)
"json" ->
conn
|> put_resp_header("Content-Disposition", "attachment; filename=result.json")
|> send_resp(200, Jason.encode!(%{text: result_text}))
_ ->
send_resp(conn, 200, Jason.encode!(result))
end
after
File.rm(temp_path)
end
{:error, reason} ->
send_resp(conn, 500, Jason.encode!(%{error: "Failed to save file: #{reason}"}))
end
{:error, reason} ->
send_resp(conn, 500, Jason.encode!(%{error: "Failed to read file: #{reason}"}))
end
end
end
post "/v1/audio/translations" do
send_resp(conn, 200, Jason.encode!(%{}))
end
get "/health" do
send_resp(conn, 200, Jason.encode!(%{status: "ok"}))
end
get "/v1/models" do
send_resp(conn, 200, Jason.encode!(["whisper-1"]))
end
get "/v1/models/:model" do
model = conn.params["model"]
if model == "whisper-1" do
send_resp(conn, 200, Jason.encode!(%{name: "whisper-1"}))
else
send_resp(conn, 404, Jason.encode!(%{error: "Model not found"}))
end
end
match _ do
send_resp(conn, 404, "Not Found")
end end
defp extract_text_from_infer_response(response) do defp extract_text_from_infer_response(response) do
@ -114,6 +40,18 @@ defmodule WhisperServer do
end end
end end
defp handle_request_large(conn, serving_name) do
upload = conn.params["file"]
temp_path = decode_audio_from_body(upload)
try do
result = Nx.Serving.batched_run(serving_name, {:file, temp_path})
send_resp(conn, 200, Jason.encode!(result))
after
File.rm(temp_path)
end
end
defp decode_audio_from_body(%Plug.Upload{path: uploaded_file_path, filename: filename}) do defp decode_audio_from_body(%Plug.Upload{path: uploaded_file_path, filename: filename}) do
unique_name = "uploaded_#{System.unique_integer([:positive])}_#{filename}" unique_name = "uploaded_#{System.unique_integer([:positive])}_#{filename}"
temp_path = Path.join("uploads", unique_name) temp_path = Path.join("uploads", unique_name)