CLIENT=cuda DEFAULT_DEVICE_ID=0 MEMORY_FRACTION=0.5 PORT=4000 BATCH_SIZE=3 BATCH_TIMEOUT=3000 MODEL=openai/whisper-base XLA_TARGET=cuda12