Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
a3789d1
1
Parent(s):
ac0d581
modified
Browse files- Dockerfile +3 -3
- entrypoint.sh +25 -2
Dockerfile
CHANGED
@@ -8,9 +8,9 @@ ENV XDG_CACHE_HOME="/tmp/cache"
|
|
8 |
ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
|
9 |
ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
|
10 |
|
11 |
-
# Create
|
12 |
-
RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache && \
|
13 |
-
chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache
|
14 |
|
15 |
# Set the working directory
|
16 |
WORKDIR /app
|
|
|
8 |
ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
|
9 |
ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
|
10 |
|
11 |
+
# Create necessary directories and set permissions
|
12 |
+
RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config && \
|
13 |
+
chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config
|
14 |
|
15 |
# Set the working directory
|
16 |
WORKDIR /app
|
entrypoint.sh
CHANGED
@@ -7,11 +7,28 @@ MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
|
7 |
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
8 |
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
9 |
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
-
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
# Construct the command
|
14 |
-
CMD="
|
|
|
|
|
|
|
15 |
--dtype $DTYPE \
|
16 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
17 |
--max-num-seqs $MAX_NUM_SEQS \
|
@@ -23,5 +40,11 @@ if [ "$ENFORCE_EAGER" = "true" ]; then
|
|
23 |
CMD="$CMD --enforce-eager"
|
24 |
fi
|
25 |
|
|
|
|
|
|
|
|
|
|
|
26 |
# Execute the command
|
|
|
27 |
exec $CMD
|
|
|
7 |
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
8 |
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
9 |
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
+
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
+
|
12 |
+
# Print environment for debugging
|
13 |
+
echo "Environment variables:"
|
14 |
+
env
|
15 |
+
|
16 |
+
# Check and set permissions for directories
|
17 |
+
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config; do
|
18 |
+
if [ ! -d "$dir" ]; then
|
19 |
+
mkdir -p "$dir"
|
20 |
+
fi
|
21 |
+
chmod -R 777 "$dir"
|
22 |
+
echo "Permissions for $dir:"
|
23 |
+
ls -la "$dir"
|
24 |
+
done
|
25 |
|
26 |
|
27 |
# Construct the command
|
28 |
+
CMD="python -m vllm.entrypoints.openai.api_server \
|
29 |
+
--model $MODEL \
|
30 |
+
--host 0.0.0.0 \
|
31 |
+
--port 8000 \
|
32 |
--dtype $DTYPE \
|
33 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
34 |
--max-num-seqs $MAX_NUM_SEQS \
|
|
|
40 |
CMD="$CMD --enforce-eager"
|
41 |
fi
|
42 |
|
43 |
+
# Disable usage reporting if set
|
44 |
+
if [ "$DISABLE_USAGE_REPORTING" = "true" ]; then
|
45 |
+
CMD="$CMD --disable-usage-reporting"
|
46 |
+
fi
|
47 |
+
|
48 |
# Execute the command
|
49 |
+
echo "Running command: $CMD"
|
50 |
exec $CMD
|