Damien Benveniste commited on
Commit
adf79f3
·
1 Parent(s): 0e2b512
Files changed (2) hide show
  1. Dockerfile +31 -31
  2. entrypoint.sh +49 -49
Dockerfile CHANGED
@@ -1,34 +1,34 @@
1
- # # Use the official vLLM image as the base image
2
- # FROM vllm/vllm-openai:latest
3
 
4
- # # Install debugging tools
5
- # RUN apt-get update && apt-get install -y procps vim
6
 
7
- # # Set environment variables
8
- # ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
9
- # ENV HF_HOME="/tmp/huggingface"
10
- # ENV XDG_CACHE_HOME="/tmp/cache"
11
- # ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
12
- # ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
13
- # ENV VLLM_USE_MODELSCOPE="false"
14
- # ENV VLLM_DISABLE_USAGE_STATS="true"
15
- # ENV XDG_CONFIG_HOME="/tmp/config"
16
 
17
- # # Ensure PATH includes common Python locations
18
- # ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
19
 
20
- # # Set the working directory
21
- # WORKDIR /app
22
 
23
- # # Copy your entrypoint script
24
- # COPY entrypoint.sh /app/entrypoint.sh
25
- # RUN chmod +x /app/entrypoint.sh
26
 
27
- # # Expose the port the app runs on
28
- # EXPOSE 8000
29
 
30
- # # Set the entrypoint
31
- # ENTRYPOINT ["/app/entrypoint.sh"]
32
 
33
  # FROM python:3.9
34
 
@@ -63,10 +63,10 @@
63
 
64
 
65
  # ARG VERSION=v0.5.3.post1
66
- FROM vllm/vllm-openai:latest
67
- ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
68
- EXPOSE 8000
69
- WORKDIR /app
70
- COPY entrypoint.sh /app/entrypoint.sh
71
- RUN chmod +x /app/entrypoint.sh
72
- ENTRYPOINT [ "/app/entrypoint.sh" ]
 
1
+ # Use the official vLLM image as the base image
2
+ FROM vllm/vllm-openai:latest
3
 
4
+ # Install debugging tools
5
+ RUN apt-get update && apt-get install -y procps vim
6
 
7
+ # Set environment variables
8
+ ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
9
+ ENV HF_HOME="/tmp/huggingface"
10
+ ENV XDG_CACHE_HOME="/tmp/cache"
11
+ ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
12
+ ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
13
+ ENV VLLM_USE_MODELSCOPE="false"
14
+ ENV VLLM_DISABLE_USAGE_STATS="true"
15
+ ENV XDG_CONFIG_HOME="/tmp/config"
16
 
17
+ # Ensure PATH includes common Python locations
18
+ ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
19
 
20
+ # Set the working directory
21
+ WORKDIR /app
22
 
23
+ # Copy your entrypoint script
24
+ COPY entrypoint.sh /app/entrypoint.sh
25
+ RUN chmod +x /app/entrypoint.sh
26
 
27
+ # Expose the port the app runs on
28
+ EXPOSE 8000
29
 
30
+ # Set the entrypoint
31
+ ENTRYPOINT ["/app/entrypoint.sh"]
32
 
33
  # FROM python:3.9
34
 
 
63
 
64
 
65
  # ARG VERSION=v0.5.3.post1
66
+ # FROM vllm/vllm-openai:latest
67
+ # ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
68
+ # EXPOSE 8000
69
+ # WORKDIR /app
70
+ # COPY entrypoint.sh /app/entrypoint.sh
71
+ # RUN chmod +x /app/entrypoint.sh
72
+ # ENTRYPOINT [ "/app/entrypoint.sh" ]
entrypoint.sh CHANGED
@@ -1,64 +1,64 @@
1
  #!/bin/bash
2
 
3
  # Default values
4
- # MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
5
  # MODEL=${MODEL:-"EleutherAI/pythia-70m"}
6
- # DTYPE=${DTYPE:-"half"}
7
- # MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
8
- # MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
9
- # GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
10
- # MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
11
- # ENFORCE_EAGER=${ENFORCE_EAGER:-true}
12
 
13
- # # Disable usage stats via environment variable
14
- # export VLLM_DISABLE_USAGE_STATS=true
15
 
16
- # # Print environment for debugging
17
- # echo "Environment variables:"
18
- # env
19
 
20
- # # Create and set permissions for the config directory
21
- # CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
22
 
23
- # if [ ! -d "$CONFIG_DIR" ]; then
24
- # mkdir -p "$CONFIG_DIR"
25
- # fi
26
- # chmod -R 777 "$CONFIG_DIR"
27
- # echo "Permissions for $CONFIG_DIR:"
28
- # ls -la "$CONFIG_DIR"
29
 
30
- # # Check and set permissions for directories
31
- # for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
32
- # if [ ! -d "$dir" ]; then
33
- # mkdir -p "$dir"
34
- # fi
35
- # chmod -R 777 "$dir"
36
- # echo "Permissions for $dir:"
37
- # ls -la "$dir"
38
- # done
39
 
40
- # # Construct the command
41
- # CMD="vllm serve $MODEL \
42
- # --host 0.0.0.0 \
43
- # --port 8000 \
44
- # --dtype $DTYPE \
45
- # --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
46
- # --max-num-seqs $MAX_NUM_SEQS \
47
- # --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
48
- # --max-model-len $MAX_MODEL_LEN"
49
 
50
- # # Add enforce-eager only if it's set to true
51
- # if [ "$ENFORCE_EAGER" = "true" ]; then
52
- # CMD="$CMD --enforce-eager"
53
- # fi
54
 
55
 
56
- python3 -m vllm.entrypoints.openai.api_server \
57
- --model EleutherAI/pythia-70m \
58
- --gpu-memory-utilization 0.9 \
59
- --max-model-len 200
60
 
61
 
62
- # # Execute the command
63
- # echo "Running command: $CMD"
64
- # exec $CMD
 
1
  #!/bin/bash
2
 
3
  # Default values
4
+ MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
5
  # MODEL=${MODEL:-"EleutherAI/pythia-70m"}
6
+ DTYPE=${DTYPE:-"half"}
7
+ MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
8
+ MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
9
+ GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
10
+ MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
11
+ ENFORCE_EAGER=${ENFORCE_EAGER:-true}
12
 
13
+ # Disable usage stats via environment variable
14
+ export VLLM_DISABLE_USAGE_STATS=true
15
 
16
+ # Print environment for debugging
17
+ echo "Environment variables:"
18
+ env
19
 
20
+ # Create and set permissions for the config directory
21
+ CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
22
 
23
+ if [ ! -d "$CONFIG_DIR" ]; then
24
+ mkdir -p "$CONFIG_DIR"
25
+ fi
26
+ chmod -R 777 "$CONFIG_DIR"
27
+ echo "Permissions for $CONFIG_DIR:"
28
+ ls -la "$CONFIG_DIR"
29
 
30
+ # Check and set permissions for directories
31
+ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
32
+ if [ ! -d "$dir" ]; then
33
+ mkdir -p "$dir"
34
+ fi
35
+ chmod -R 777 "$dir"
36
+ echo "Permissions for $dir:"
37
+ ls -la "$dir"
38
+ done
39
 
40
+ # Construct the command
41
+ CMD="vllm serve $MODEL \
42
+ --host 0.0.0.0 \
43
+ --port 8000 \
44
+ --dtype $DTYPE \
45
+ --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
46
+ --max-num-seqs $MAX_NUM_SEQS \
47
+ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
48
+ --max-model-len $MAX_MODEL_LEN"
49
 
50
+ # Add enforce-eager only if it's set to true
51
+ if [ "$ENFORCE_EAGER" = "true" ]; then
52
+ CMD="$CMD --enforce-eager"
53
+ fi
54
 
55
 
56
+ # python3 -m vllm.entrypoints.openai.api_server \
57
+ # --model EleutherAI/pythia-70m \
58
+ # --gpu-memory-utilization 0.9 \
59
+ # --max-model-len 200
60
 
61
 
62
+ # Execute the command
63
+ echo "Running command: $CMD"
64
+ exec $CMD