diff --git a/whisper_server/_build/dev/lib/whisper_server/.mix/compile.elixir b/whisper_server/_build/dev/lib/whisper_server/.mix/compile.elixir index 611e075a..9ed13c7f 100644 Binary files a/whisper_server/_build/dev/lib/whisper_server/.mix/compile.elixir and b/whisper_server/_build/dev/lib/whisper_server/.mix/compile.elixir differ diff --git a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Application.beam b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Application.beam index b13d12b5..80b1fe86 100644 Binary files a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Application.beam and b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Application.beam differ diff --git a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Large.beam b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Large.beam new file mode 100644 index 00000000..7a55d5f3 Binary files /dev/null and b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.Large.beam differ diff --git a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.WhisperInference.beam b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.WhisperInference.beam index 2cdb84fa..7568de27 100644 Binary files a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.WhisperInference.beam and b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.WhisperInference.beam differ diff --git a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.beam b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.beam index f9caa992..fe8166d3 100644 Binary files a/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.beam and b/whisper_server/_build/dev/lib/whisper_server/ebin/Elixir.WhisperServer.beam differ diff --git a/whisper_server/_build/dev/lib/whisper_server/ebin/whisper_server.app b/whisper_server/_build/dev/lib/whisper_server/ebin/whisper_server.app index 134a6c7d..5e8b86f2 100644 --- a/whisper_server/_build/dev/lib/whisper_server/ebin/whisper_server.app +++ b/whisper_server/_build/dev/lib/whisper_server/ebin/whisper_server.app @@ -2,6 +2,7 @@ [{modules,['Elixir.WhisperServer', 'Elixir.WhisperServer.Application', 'Elixir.WhisperServer.InferenceRunner', + 'Elixir.WhisperServer.Large', 'Elixir.WhisperServer.WhisperInference']}, {optional_applications,[]}, {applications,[kernel,stdlib,elixir,logger,bumblebee,exla,nx, diff --git a/whisper_server/lib/whisper_server/application.ex b/whisper_server/lib/whisper_server/application.ex index d648e615..2db28cab 100644 --- a/whisper_server/lib/whisper_server/application.ex +++ b/whisper_server/lib/whisper_server/application.ex @@ -9,9 +9,10 @@ defmodule WhisperServer.Application do Application.put_env(:whisper_server, :batch_size, args[:batch_size]) Application.put_env(:whisper_server, :batch_timeout, args[:batch_timeout]) Application.put_env(:whisper_server, :port, args[:port]) - + children = [ WhisperServer.WhisperInference, + WhisperServer.Large, {Plug.Cowboy, scheme: :http, plug: WhisperServer, options: [port: args[:port]]} ] diff --git a/whisper_server/lib/whisper_server/large.ex b/whisper_server/lib/whisper_server/large.ex new file mode 100644 index 00000000..d4a6d8fd --- /dev/null +++ b/whisper_server/lib/whisper_server/large.ex @@ -0,0 +1,33 @@ +defmodule WhisperServer.Large do + use Supervisor + + def start_link(_opts) do + Supervisor.start_link(__MODULE__, [], name: __MODULE__) + end + + def init(_opts) do + model_name = "openai/whisper-large-v3" + + {:ok, model} = Bumblebee.load_model({:hf, model_name}) + {:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name}) + {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name}) + {:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name}) + + serving = Bumblebee.Audio.speech_to_text_whisper( + model, featurizer, tokenizer, generation_config, + chunk_num_seconds: 30, + language: "es", + defn_options: [compiler: EXLA, client: :host] + ) + + children = [ + {Nx.Serving, + serving: serving, + name: __MODULE__.Serving, + batch_size: 1, + batch_timeout: 5000} + ] + + Supervisor.init(children, strategy: :one_for_one) + end +end diff --git a/whisper_server/lib/whisper_server/transcription_socket.ex b/whisper_server/lib/whisper_server/transcription_socket.ex new file mode 100644 index 00000000..8f70d288 --- /dev/null +++ b/whisper_server/lib/whisper_server/transcription_socket.ex @@ -0,0 +1,18 @@ +# defmodule WhisperServer.TranscriptionSocket do +# @behaviour :cowboy_websocket + +# def init(req, _opts) do +# {:cowboy_websocket, req, %{chunks: []}} +# end + +# def websocket_init(state), do: {:ok, state} + +# def websocket_handle({:binary, data}, state) do +# IO.puts("🟡 Recibido chunk de #{byte_size(data)} bytes") +# {:reply, {:text, ~s({"text": "chunk received, size #{byte_size(data)}"})}, state} +# end + +# def websocket_handle(_data, state), do: {:ok, state} +# def websocket_info(_info, state), do: {:ok, state} +# def terminate(_reason, _req, _state), do: :ok +# end diff --git a/whisper_server/lib/whisper_server/whisper_inference.ex b/whisper_server/lib/whisper_server/whisper_inference.ex index 0f948d47..4b7f660d 100644 --- a/whisper_server/lib/whisper_server/whisper_inference.ex +++ b/whisper_server/lib/whisper_server/whisper_inference.ex @@ -14,13 +14,17 @@ defmodule WhisperServer.WhisperInference do raw_client = Application.get_env(:whisper_server, :client, :host) client = - case raw_client do + case String.to_atom(to_string(raw_client)) do :rocm -> IO.warn("Client :rocm is not supported, falling back to :host") :host - _ -> raw_client + :cuda -> + IO.warn("Client :cuda requires GPU support, falling back to :host") + :host + atom -> atom end + batch_size = Application.get_env(:whisper_server, :batch_size, 3) batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000) diff --git a/whisper_server/lib/whisper_server/whisper_server.ex b/whisper_server/lib/whisper_server/whisper_server.ex index b44b5c9b..1704b678 100644 --- a/whisper_server/lib/whisper_server/whisper_server.ex +++ b/whisper_server/lib/whisper_server/whisper_server.ex @@ -10,86 +10,12 @@ defmodule WhisperServer do plug :match plug :dispatch - post "/infer" do + post "/tiny" do handle_request(conn) end - post "/v1/audio/transcriptions" do - model = conn.params["model"] || "whisper-1" - response_format = conn.params["response_format"] || "json" - - if model != "whisper-1" do - send_resp(conn, 400, Jason.encode!(%{error: "Unsupported model"})) - else - upload = conn.params["file"] - - case File.read(upload.path) do - {:ok, file_bytes} -> - - filename = "uploaded_#{System.unique_integer([:positive])}_#{upload.filename}" - temp_path = Path.join("uploads", filename) - - File.mkdir_p!("uploads") - - case File.write(temp_path, file_bytes) do - :ok -> - try do - result = WhisperServer.InferenceRunner.run_inference(temp_path) - Logger.info("Inference result: #{inspect(result)}") - result_text = extract_text_from_infer_response(result) - Logger.info("Extracted text: #{result_text}") - - case response_format do - "text" -> - conn - |> put_resp_header("Content-Disposition", "attachment; filename=result.txt") - |> send_resp(200, result_text) - - "json" -> - conn - |> put_resp_header("Content-Disposition", "attachment; filename=result.json") - |> send_resp(200, Jason.encode!(%{text: result_text})) - - _ -> - send_resp(conn, 200, Jason.encode!(result)) - end - after - File.rm(temp_path) - end - - {:error, reason} -> - send_resp(conn, 500, Jason.encode!(%{error: "Failed to save file: #{reason}"})) - end - - {:error, reason} -> - send_resp(conn, 500, Jason.encode!(%{error: "Failed to read file: #{reason}"})) - end - end - end - - post "/v1/audio/translations" do - send_resp(conn, 200, Jason.encode!(%{})) - end - - get "/health" do - send_resp(conn, 200, Jason.encode!(%{status: "ok"})) - end - - get "/v1/models" do - send_resp(conn, 200, Jason.encode!(["whisper-1"])) - end - - get "/v1/models/:model" do - model = conn.params["model"] - if model == "whisper-1" do - send_resp(conn, 200, Jason.encode!(%{name: "whisper-1"})) - else - send_resp(conn, 404, Jason.encode!(%{error: "Model not found"})) - end - end - - match _ do - send_resp(conn, 404, "Not Found") + post "/large" do + handle_request_large(conn, WhisperServer.Large.Serving) end defp extract_text_from_infer_response(response) do @@ -114,6 +40,18 @@ defmodule WhisperServer do end end + defp handle_request_large(conn, serving_name) do + upload = conn.params["file"] + temp_path = decode_audio_from_body(upload) + + try do + result = Nx.Serving.batched_run(serving_name, {:file, temp_path}) + send_resp(conn, 200, Jason.encode!(result)) + after + File.rm(temp_path) + end + end + defp decode_audio_from_body(%Plug.Upload{path: uploaded_file_path, filename: filename}) do unique_name = "uploaded_#{System.unique_integer([:positive])}_#{filename}" temp_path = Path.join("uploads", unique_name) diff --git a/whisper_server/uploads/uploaded_11330_recording_1752678344186.wav b/whisper_server/uploads/uploaded_11330_recording_1752678344186.wav new file mode 100644 index 00000000..7b3dd449 Binary files /dev/null and b/whisper_server/uploads/uploaded_11330_recording_1752678344186.wav differ diff --git a/whisper_server/uploads/uploaded_11458_realtime_<0.2266.0>_13186.wav b/whisper_server/uploads/uploaded_11458_realtime_<0.2266.0>_13186.wav new file mode 100644 index 00000000..7b3dd449 Binary files /dev/null and b/whisper_server/uploads/uploaded_11458_realtime_<0.2266.0>_13186.wav differ diff --git a/whisper_server/uploads/uploaded_130_recording_1752675855030.wav b/whisper_server/uploads/uploaded_130_recording_1752675855030.wav new file mode 100644 index 00000000..600ec056 Binary files /dev/null and b/whisper_server/uploads/uploaded_130_recording_1752675855030.wav differ diff --git a/whisper_server/uploads/uploaded_5378_recording_1752672043951.wav b/whisper_server/uploads/uploaded_5378_recording_1752672043951.wav new file mode 100644 index 00000000..ee3ed2e4 Binary files /dev/null and b/whisper_server/uploads/uploaded_5378_recording_1752672043951.wav differ