Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
adf79f3
1
Parent(s):
0e2b512
modified
Browse files- Dockerfile +31 -31
- entrypoint.sh +49 -49
Dockerfile
CHANGED
@@ -1,34 +1,34 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
|
4 |
-
#
|
5 |
-
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
|
33 |
# FROM python:3.9
|
34 |
|
@@ -63,10 +63,10 @@
|
|
63 |
|
64 |
|
65 |
# ARG VERSION=v0.5.3.post1
|
66 |
-
FROM vllm/vllm-openai:latest
|
67 |
-
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
68 |
-
EXPOSE 8000
|
69 |
-
WORKDIR /app
|
70 |
-
COPY entrypoint.sh /app/entrypoint.sh
|
71 |
-
RUN chmod +x /app/entrypoint.sh
|
72 |
-
ENTRYPOINT [ "/app/entrypoint.sh" ]
|
|
|
1 |
+
# Use the official vLLM image as the base image
|
2 |
+
FROM vllm/vllm-openai:latest
|
3 |
|
4 |
+
# Install debugging tools
|
5 |
+
RUN apt-get update && apt-get install -y procps vim
|
6 |
|
7 |
+
# Set environment variables
|
8 |
+
ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
|
9 |
+
ENV HF_HOME="/tmp/huggingface"
|
10 |
+
ENV XDG_CACHE_HOME="/tmp/cache"
|
11 |
+
ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
|
12 |
+
ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
|
13 |
+
ENV VLLM_USE_MODELSCOPE="false"
|
14 |
+
ENV VLLM_DISABLE_USAGE_STATS="true"
|
15 |
+
ENV XDG_CONFIG_HOME="/tmp/config"
|
16 |
|
17 |
+
# Ensure PATH includes common Python locations
|
18 |
+
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
19 |
|
20 |
+
# Set the working directory
|
21 |
+
WORKDIR /app
|
22 |
|
23 |
+
# Copy your entrypoint script
|
24 |
+
COPY entrypoint.sh /app/entrypoint.sh
|
25 |
+
RUN chmod +x /app/entrypoint.sh
|
26 |
|
27 |
+
# Expose the port the app runs on
|
28 |
+
EXPOSE 8000
|
29 |
|
30 |
+
# Set the entrypoint
|
31 |
+
ENTRYPOINT ["/app/entrypoint.sh"]
|
32 |
|
33 |
# FROM python:3.9
|
34 |
|
|
|
63 |
|
64 |
|
65 |
# ARG VERSION=v0.5.3.post1
|
66 |
+
# FROM vllm/vllm-openai:latest
|
67 |
+
# ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
68 |
+
# EXPOSE 8000
|
69 |
+
# WORKDIR /app
|
70 |
+
# COPY entrypoint.sh /app/entrypoint.sh
|
71 |
+
# RUN chmod +x /app/entrypoint.sh
|
72 |
+
# ENTRYPOINT [ "/app/entrypoint.sh" ]
|
entrypoint.sh
CHANGED
@@ -1,64 +1,64 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
# Default values
|
4 |
-
|
5 |
# MODEL=${MODEL:-"EleutherAI/pythia-70m"}
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
|
56 |
-
python3 -m vllm.entrypoints.openai.api_server \
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
# Default values
|
4 |
+
MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
|
5 |
# MODEL=${MODEL:-"EleutherAI/pythia-70m"}
|
6 |
+
DTYPE=${DTYPE:-"half"}
|
7 |
+
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
8 |
+
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
9 |
+
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
10 |
+
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
11 |
+
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
12 |
|
13 |
+
# Disable usage stats via environment variable
|
14 |
+
export VLLM_DISABLE_USAGE_STATS=true
|
15 |
|
16 |
+
# Print environment for debugging
|
17 |
+
echo "Environment variables:"
|
18 |
+
env
|
19 |
|
20 |
+
# Create and set permissions for the config directory
|
21 |
+
CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
|
22 |
|
23 |
+
if [ ! -d "$CONFIG_DIR" ]; then
|
24 |
+
mkdir -p "$CONFIG_DIR"
|
25 |
+
fi
|
26 |
+
chmod -R 777 "$CONFIG_DIR"
|
27 |
+
echo "Permissions for $CONFIG_DIR:"
|
28 |
+
ls -la "$CONFIG_DIR"
|
29 |
|
30 |
+
# Check and set permissions for directories
|
31 |
+
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
|
32 |
+
if [ ! -d "$dir" ]; then
|
33 |
+
mkdir -p "$dir"
|
34 |
+
fi
|
35 |
+
chmod -R 777 "$dir"
|
36 |
+
echo "Permissions for $dir:"
|
37 |
+
ls -la "$dir"
|
38 |
+
done
|
39 |
|
40 |
+
# Construct the command
|
41 |
+
CMD="vllm serve $MODEL \
|
42 |
+
--host 0.0.0.0 \
|
43 |
+
--port 8000 \
|
44 |
+
--dtype $DTYPE \
|
45 |
+
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
46 |
+
--max-num-seqs $MAX_NUM_SEQS \
|
47 |
+
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
48 |
+
--max-model-len $MAX_MODEL_LEN"
|
49 |
|
50 |
+
# Add enforce-eager only if it's set to true
|
51 |
+
if [ "$ENFORCE_EAGER" = "true" ]; then
|
52 |
+
CMD="$CMD --enforce-eager"
|
53 |
+
fi
|
54 |
|
55 |
|
56 |
+
# python3 -m vllm.entrypoints.openai.api_server \
|
57 |
+
# --model EleutherAI/pythia-70m \
|
58 |
+
# --gpu-memory-utilization 0.9 \
|
59 |
+
# --max-model-len 200
|
60 |
|
61 |
|
62 |
+
# Execute the command
|
63 |
+
echo "Running command: $CMD"
|
64 |
+
exec $CMD
|