Spaces:
Sleeping
Sleeping
File size: 1,124 Bytes
40afe12 adf79f3 a3789d1 adf79f3 9ed86a1 adf79f3 494ceb5 adf79f3 40afe12 adf79f3 10dd1af adf79f3 10dd1af adf79f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
#!/bin/bash
# Default values
MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
DTYPE=${DTYPE:-"half"}
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
# Disable usage stats via environment variable
export VLLM_DISABLE_USAGE_STATS=true
# Check and set permissions for directories
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config; do
if [ ! -d "$dir" ]; then
mkdir -p "$dir"
fi
chmod -R 777 "$dir"
echo "Permissions for $dir:"
ls -la "$dir"
done
# Construct the command
CMD="vllm serve $MODEL \
--host 0.0.0.0 \
--port 8000 \
--dtype $DTYPE \
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
--max-num-seqs $MAX_NUM_SEQS \
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
--max-model-len $MAX_MODEL_LEN"
# Add enforce-eager only if it's set to true
if [ "$ENFORCE_EAGER" = "true" ]; then
CMD="$CMD --enforce-eager"
fi
# Execute the command
echo "Running command: $CMD"
exec $CMD |