Damien Benveniste commited on
Commit
a3789d1
·
1 Parent(s): ac0d581
Files changed (2) hide show
  1. Dockerfile +3 -3
  2. entrypoint.sh +25 -2
Dockerfile CHANGED
@@ -8,9 +8,9 @@ ENV XDG_CACHE_HOME="/tmp/cache"
8
  ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
9
  ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
10
 
11
- # Create cache directories and set permissions
12
- RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache && \
13
- chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache
14
 
15
  # Set the working directory
16
  WORKDIR /app
 
8
  ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
9
  ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
10
 
11
+ # Create necessary directories and set permissions
12
+ RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config && \
13
+ chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config
14
 
15
  # Set the working directory
16
  WORKDIR /app
entrypoint.sh CHANGED
@@ -7,11 +7,28 @@ MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
7
  MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
8
  GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
  MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
- ENFORCE_EAGER=${ENFORCE_EAGER:-true}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  # Construct the command
14
- CMD="vllm serve $MODEL \
 
 
 
15
  --dtype $DTYPE \
16
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
17
  --max-num-seqs $MAX_NUM_SEQS \
@@ -23,5 +40,11 @@ if [ "$ENFORCE_EAGER" = "true" ]; then
23
  CMD="$CMD --enforce-eager"
24
  fi
25
 
 
 
 
 
 
26
  # Execute the command
 
27
  exec $CMD
 
7
  MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
8
  GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
  MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
+ ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
+
12
+ # Print environment for debugging
13
+ echo "Environment variables:"
14
+ env
15
+
16
+ # Check and set permissions for directories
17
+ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config; do
18
+ if [ ! -d "$dir" ]; then
19
+ mkdir -p "$dir"
20
+ fi
21
+ chmod -R 777 "$dir"
22
+ echo "Permissions for $dir:"
23
+ ls -la "$dir"
24
+ done
25
 
26
 
27
  # Construct the command
28
+ CMD="python -m vllm.entrypoints.openai.api_server \
29
+ --model $MODEL \
30
+ --host 0.0.0.0 \
31
+ --port 8000 \
32
  --dtype $DTYPE \
33
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
34
  --max-num-seqs $MAX_NUM_SEQS \
 
40
  CMD="$CMD --enforce-eager"
41
  fi
42
 
43
+ # Disable usage reporting if set
44
+ if [ "$DISABLE_USAGE_REPORTING" = "true" ]; then
45
+ CMD="$CMD --disable-usage-reporting"
46
+ fi
47
+
48
  # Execute the command
49
+ echo "Running command: $CMD"
50
  exec $CMD