Damien Benveniste commited on
Commit
40afe12
·
1 Parent(s): 17a1be6

initial commit

Browse files
Files changed (2) hide show
  1. Dockerfile +23 -0
  2. entrypoint.sh +26 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Set the working directory in the container
13
+ WORKDIR /app
14
+
15
+ # Install vLLM
16
+ RUN pip install --no-cache-dir vllm
17
+
18
+ # Copy the entrypoint script
19
+ COPY entrypoint.sh /app/entrypoint.sh
20
+ RUN chmod +x /app/entrypoint.sh
21
+
22
+ # Set the entrypoint
23
+ ENTRYPOINT ["/app/entrypoint.sh"]
entrypoint.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Default values
4
+ MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
5
+ DTYPE=${DTYPE:-"half"}
6
+ MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
7
+ MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
8
+ GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
+ MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
+ ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
+
12
+
13
+ # # Check if API key is provided
14
+ # if [ -z "$API_KEY" ]; then
15
+ # echo "Error: API_KEY environment variable is not set"
16
+ # exit 1
17
+ # fi
18
+
19
+ # Run vLLM server
20
+ exec vllm serve $MODEL \
21
+ --dtype $DTYPE \
22
+ --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
23
+ --max-num-seqs $MAX_NUM_SEQS \
24
+ --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
25
+ --max-model-len $MAX_MODEL_LEN \
26
+ --enforce-eager $ENFORCE_EAGER