Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
10dd1af
1
Parent(s):
6490764
modified
Browse files- entrypoint.sh +20 -14
entrypoint.sh
CHANGED
@@ -37,20 +37,26 @@ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.co
|
|
37 |
ls -la "$dir"
|
38 |
done
|
39 |
|
40 |
-
# Construct the command
|
41 |
-
CMD="vllm serve $MODEL \
|
42 |
-
--host 0.0.0.0 \
|
43 |
-
--port 8000 \
|
44 |
-
--dtype $DTYPE \
|
45 |
-
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
46 |
-
--max-num-seqs $MAX_NUM_SEQS \
|
47 |
-
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
48 |
-
--max-model-len $MAX_MODEL_LEN"
|
49 |
-
|
50 |
-
# Add enforce-eager only if it's set to true
|
51 |
-
if [ "$ENFORCE_EAGER" = "true" ]; then
|
52 |
-
|
53 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
# Execute the command
|
|
|
37 |
ls -la "$dir"
|
38 |
done
|
39 |
|
40 |
+
# # Construct the command
|
41 |
+
# CMD="vllm serve $MODEL \
|
42 |
+
# --host 0.0.0.0 \
|
43 |
+
# --port 8000 \
|
44 |
+
# --dtype $DTYPE \
|
45 |
+
# --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
46 |
+
# --max-num-seqs $MAX_NUM_SEQS \
|
47 |
+
# --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
48 |
+
# --max-model-len $MAX_MODEL_LEN"
|
49 |
+
|
50 |
+
# # Add enforce-eager only if it's set to true
|
51 |
+
# if [ "$ENFORCE_EAGER" = "true" ]; then
|
52 |
+
# CMD="$CMD --enforce-eager"
|
53 |
+
# fi
|
54 |
+
|
55 |
+
|
56 |
+
CMD="python3 -m vllm.entrypoints.openai.api_server \
|
57 |
+
--model EleutherAI/pythia-70m \
|
58 |
+
--gpu-memory-utilization 0.9
|
59 |
+
--max-model-len 200"
|
60 |
|
61 |
|
62 |
# Execute the command
|