server con post tiny y post large
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -2,6 +2,7 @@
|
|||||||
[{modules,['Elixir.WhisperServer',
|
[{modules,['Elixir.WhisperServer',
|
||||||
'Elixir.WhisperServer.Application',
|
'Elixir.WhisperServer.Application',
|
||||||
'Elixir.WhisperServer.InferenceRunner',
|
'Elixir.WhisperServer.InferenceRunner',
|
||||||
|
'Elixir.WhisperServer.Large',
|
||||||
'Elixir.WhisperServer.WhisperInference']},
|
'Elixir.WhisperServer.WhisperInference']},
|
||||||
{optional_applications,[]},
|
{optional_applications,[]},
|
||||||
{applications,[kernel,stdlib,elixir,logger,bumblebee,exla,nx,
|
{applications,[kernel,stdlib,elixir,logger,bumblebee,exla,nx,
|
||||||
|
@ -9,9 +9,10 @@ defmodule WhisperServer.Application do
|
|||||||
Application.put_env(:whisper_server, :batch_size, args[:batch_size])
|
Application.put_env(:whisper_server, :batch_size, args[:batch_size])
|
||||||
Application.put_env(:whisper_server, :batch_timeout, args[:batch_timeout])
|
Application.put_env(:whisper_server, :batch_timeout, args[:batch_timeout])
|
||||||
Application.put_env(:whisper_server, :port, args[:port])
|
Application.put_env(:whisper_server, :port, args[:port])
|
||||||
|
|
||||||
children = [
|
children = [
|
||||||
WhisperServer.WhisperInference,
|
WhisperServer.WhisperInference,
|
||||||
|
WhisperServer.Large,
|
||||||
{Plug.Cowboy, scheme: :http, plug: WhisperServer, options: [port: args[:port]]}
|
{Plug.Cowboy, scheme: :http, plug: WhisperServer, options: [port: args[:port]]}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
33
whisper_server/lib/whisper_server/large.ex
Normal file
33
whisper_server/lib/whisper_server/large.ex
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
defmodule WhisperServer.Large do
|
||||||
|
use Supervisor
|
||||||
|
|
||||||
|
def start_link(_opts) do
|
||||||
|
Supervisor.start_link(__MODULE__, [], name: __MODULE__)
|
||||||
|
end
|
||||||
|
|
||||||
|
def init(_opts) do
|
||||||
|
model_name = "openai/whisper-large-v3"
|
||||||
|
|
||||||
|
{:ok, model} = Bumblebee.load_model({:hf, model_name})
|
||||||
|
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, model_name})
|
||||||
|
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, model_name})
|
||||||
|
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, model_name})
|
||||||
|
|
||||||
|
serving = Bumblebee.Audio.speech_to_text_whisper(
|
||||||
|
model, featurizer, tokenizer, generation_config,
|
||||||
|
chunk_num_seconds: 30,
|
||||||
|
language: "es",
|
||||||
|
defn_options: [compiler: EXLA, client: :host]
|
||||||
|
)
|
||||||
|
|
||||||
|
children = [
|
||||||
|
{Nx.Serving,
|
||||||
|
serving: serving,
|
||||||
|
name: __MODULE__.Serving,
|
||||||
|
batch_size: 1,
|
||||||
|
batch_timeout: 5000}
|
||||||
|
]
|
||||||
|
|
||||||
|
Supervisor.init(children, strategy: :one_for_one)
|
||||||
|
end
|
||||||
|
end
|
18
whisper_server/lib/whisper_server/transcription_socket.ex
Normal file
18
whisper_server/lib/whisper_server/transcription_socket.ex
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# defmodule WhisperServer.TranscriptionSocket do
|
||||||
|
# @behaviour :cowboy_websocket
|
||||||
|
|
||||||
|
# def init(req, _opts) do
|
||||||
|
# {:cowboy_websocket, req, %{chunks: []}}
|
||||||
|
# end
|
||||||
|
|
||||||
|
# def websocket_init(state), do: {:ok, state}
|
||||||
|
|
||||||
|
# def websocket_handle({:binary, data}, state) do
|
||||||
|
# IO.puts("🟡 Recibido chunk de #{byte_size(data)} bytes")
|
||||||
|
# {:reply, {:text, ~s({"text": "chunk received, size #{byte_size(data)}"})}, state}
|
||||||
|
# end
|
||||||
|
|
||||||
|
# def websocket_handle(_data, state), do: {:ok, state}
|
||||||
|
# def websocket_info(_info, state), do: {:ok, state}
|
||||||
|
# def terminate(_reason, _req, _state), do: :ok
|
||||||
|
# end
|
@ -14,13 +14,17 @@ defmodule WhisperServer.WhisperInference do
|
|||||||
raw_client = Application.get_env(:whisper_server, :client, :host)
|
raw_client = Application.get_env(:whisper_server, :client, :host)
|
||||||
|
|
||||||
client =
|
client =
|
||||||
case raw_client do
|
case String.to_atom(to_string(raw_client)) do
|
||||||
:rocm ->
|
:rocm ->
|
||||||
IO.warn("Client :rocm is not supported, falling back to :host")
|
IO.warn("Client :rocm is not supported, falling back to :host")
|
||||||
:host
|
:host
|
||||||
_ -> raw_client
|
:cuda ->
|
||||||
|
IO.warn("Client :cuda requires GPU support, falling back to :host")
|
||||||
|
:host
|
||||||
|
atom -> atom
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
batch_size = Application.get_env(:whisper_server, :batch_size, 3)
|
batch_size = Application.get_env(:whisper_server, :batch_size, 3)
|
||||||
batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000)
|
batch_timeout = Application.get_env(:whisper_server, :batch_timeout, 3000)
|
||||||
|
|
||||||
|
@ -10,86 +10,12 @@ defmodule WhisperServer do
|
|||||||
plug :match
|
plug :match
|
||||||
plug :dispatch
|
plug :dispatch
|
||||||
|
|
||||||
post "/infer" do
|
post "/tiny" do
|
||||||
handle_request(conn)
|
handle_request(conn)
|
||||||
end
|
end
|
||||||
|
|
||||||
post "/v1/audio/transcriptions" do
|
post "/large" do
|
||||||
model = conn.params["model"] || "whisper-1"
|
handle_request_large(conn, WhisperServer.Large.Serving)
|
||||||
response_format = conn.params["response_format"] || "json"
|
|
||||||
|
|
||||||
if model != "whisper-1" do
|
|
||||||
send_resp(conn, 400, Jason.encode!(%{error: "Unsupported model"}))
|
|
||||||
else
|
|
||||||
upload = conn.params["file"]
|
|
||||||
|
|
||||||
case File.read(upload.path) do
|
|
||||||
{:ok, file_bytes} ->
|
|
||||||
|
|
||||||
filename = "uploaded_#{System.unique_integer([:positive])}_#{upload.filename}"
|
|
||||||
temp_path = Path.join("uploads", filename)
|
|
||||||
|
|
||||||
File.mkdir_p!("uploads")
|
|
||||||
|
|
||||||
case File.write(temp_path, file_bytes) do
|
|
||||||
:ok ->
|
|
||||||
try do
|
|
||||||
result = WhisperServer.InferenceRunner.run_inference(temp_path)
|
|
||||||
Logger.info("Inference result: #{inspect(result)}")
|
|
||||||
result_text = extract_text_from_infer_response(result)
|
|
||||||
Logger.info("Extracted text: #{result_text}")
|
|
||||||
|
|
||||||
case response_format do
|
|
||||||
"text" ->
|
|
||||||
conn
|
|
||||||
|> put_resp_header("Content-Disposition", "attachment; filename=result.txt")
|
|
||||||
|> send_resp(200, result_text)
|
|
||||||
|
|
||||||
"json" ->
|
|
||||||
conn
|
|
||||||
|> put_resp_header("Content-Disposition", "attachment; filename=result.json")
|
|
||||||
|> send_resp(200, Jason.encode!(%{text: result_text}))
|
|
||||||
|
|
||||||
_ ->
|
|
||||||
send_resp(conn, 200, Jason.encode!(result))
|
|
||||||
end
|
|
||||||
after
|
|
||||||
File.rm(temp_path)
|
|
||||||
end
|
|
||||||
|
|
||||||
{:error, reason} ->
|
|
||||||
send_resp(conn, 500, Jason.encode!(%{error: "Failed to save file: #{reason}"}))
|
|
||||||
end
|
|
||||||
|
|
||||||
{:error, reason} ->
|
|
||||||
send_resp(conn, 500, Jason.encode!(%{error: "Failed to read file: #{reason}"}))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
post "/v1/audio/translations" do
|
|
||||||
send_resp(conn, 200, Jason.encode!(%{}))
|
|
||||||
end
|
|
||||||
|
|
||||||
get "/health" do
|
|
||||||
send_resp(conn, 200, Jason.encode!(%{status: "ok"}))
|
|
||||||
end
|
|
||||||
|
|
||||||
get "/v1/models" do
|
|
||||||
send_resp(conn, 200, Jason.encode!(["whisper-1"]))
|
|
||||||
end
|
|
||||||
|
|
||||||
get "/v1/models/:model" do
|
|
||||||
model = conn.params["model"]
|
|
||||||
if model == "whisper-1" do
|
|
||||||
send_resp(conn, 200, Jason.encode!(%{name: "whisper-1"}))
|
|
||||||
else
|
|
||||||
send_resp(conn, 404, Jason.encode!(%{error: "Model not found"}))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
match _ do
|
|
||||||
send_resp(conn, 404, "Not Found")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp extract_text_from_infer_response(response) do
|
defp extract_text_from_infer_response(response) do
|
||||||
@ -114,6 +40,18 @@ defmodule WhisperServer do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp handle_request_large(conn, serving_name) do
|
||||||
|
upload = conn.params["file"]
|
||||||
|
temp_path = decode_audio_from_body(upload)
|
||||||
|
|
||||||
|
try do
|
||||||
|
result = Nx.Serving.batched_run(serving_name, {:file, temp_path})
|
||||||
|
send_resp(conn, 200, Jason.encode!(result))
|
||||||
|
after
|
||||||
|
File.rm(temp_path)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
defp decode_audio_from_body(%Plug.Upload{path: uploaded_file_path, filename: filename}) do
|
defp decode_audio_from_body(%Plug.Upload{path: uploaded_file_path, filename: filename}) do
|
||||||
unique_name = "uploaded_#{System.unique_integer([:positive])}_#{filename}"
|
unique_name = "uploaded_#{System.unique_integer([:positive])}_#{filename}"
|
||||||
temp_path = Path.join("uploads", unique_name)
|
temp_path = Path.join("uploads", unique_name)
|
||||||
|
Binary file not shown.
Binary file not shown.
BIN
whisper_server/uploads/uploaded_130_recording_1752675855030.wav
Normal file
BIN
whisper_server/uploads/uploaded_130_recording_1752675855030.wav
Normal file
Binary file not shown.
BIN
whisper_server/uploads/uploaded_5378_recording_1752672043951.wav
Normal file
BIN
whisper_server/uploads/uploaded_5378_recording_1752672043951.wav
Normal file
Binary file not shown.
Reference in New Issue
Block a user