CLIENT=cuda DEFAULT_DEVICE_ID=0 MEMORY_FRACTION=0.5 PORT=4003 BATCH_SIZE=3 BATCH_TIMEOUT=3000 MODEL=openai/whisper-tiny XLA_TARGET=cuda12