CLIENT=cuda DEFAULT_DEVICE_ID=0 MEMORY_FRACTION=0.8 PORT=4000 BATCH_SIZE=1 BATCH_TIMEOUT=0 MODEL=openai/whisper-medium XLA_TARGET=cuda12