Damien Benveniste commited on
Commit
10dd1af
·
1 Parent(s): 6490764
Files changed (1) hide show
  1. entrypoint.sh +20 -14
entrypoint.sh CHANGED
@@ -37,20 +37,26 @@ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.co
37
  ls -la "$dir"
38
  done
39
 
40
- # Construct the command
41
- CMD="vllm serve $MODEL \
42
- --host 0.0.0.0 \
43
- --port 8000 \
44
- --dtype $DTYPE \
45
- --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
46
- --max-num-seqs $MAX_NUM_SEQS \
47
- --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
48
- --max-model-len $MAX_MODEL_LEN"
49
-
50
- # Add enforce-eager only if it's set to true
51
- if [ "$ENFORCE_EAGER" = "true" ]; then
52
- CMD="$CMD --enforce-eager"
53
- fi
 
 
 
 
 
 
54
 
55
 
56
  # Execute the command
 
37
  ls -la "$dir"
38
  done
39
 
40
+ # # Construct the command
41
+ # CMD="vllm serve $MODEL \
42
+ # --host 0.0.0.0 \
43
+ # --port 8000 \
44
+ # --dtype $DTYPE \
45
+ # --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
46
+ # --max-num-seqs $MAX_NUM_SEQS \
47
+ # --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
48
+ # --max-model-len $MAX_MODEL_LEN"
49
+
50
+ # # Add enforce-eager only if it's set to true
51
+ # if [ "$ENFORCE_EAGER" = "true" ]; then
52
+ # CMD="$CMD --enforce-eager"
53
+ # fi
54
+
55
+
56
+ CMD="python3 -m vllm.entrypoints.openai.api_server \
57
+ --model EleutherAI/pythia-70m \
58
+ --gpu-memory-utilization 0.9
59
+ --max-model-len 200"
60
 
61
 
62
  # Execute the command