Spaces:

damienbenveniste
/

deploy-vllm2

Sleeping

App Files Files Community

Damien Benveniste commited on Aug 14, 2024

Commit

adf79f3

1 Parent(s): 0e2b512

modified

Browse files

Files changed (2) hide show

Dockerfile +31 -31
entrypoint.sh +49 -49

Dockerfile CHANGED Viewed

@@ -1,34 +1,34 @@
-# # Use the official vLLM image as the base image
-# FROM vllm/vllm-openai:latest
-# # Install debugging tools
-# RUN apt-get update && apt-get install -y procps vim
-# # Set environment variables
-# ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
-# ENV HF_HOME="/tmp/huggingface"
-# ENV XDG_CACHE_HOME="/tmp/cache"
-# ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
-# ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
-# ENV VLLM_USE_MODELSCOPE="false"
-# ENV VLLM_DISABLE_USAGE_STATS="true"
-# ENV XDG_CONFIG_HOME="/tmp/config"
-# # Ensure PATH includes common Python locations
-# ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
-# # Set the working directory
-# WORKDIR /app
-# # Copy your entrypoint script
-# COPY entrypoint.sh /app/entrypoint.sh
-# RUN chmod +x /app/entrypoint.sh
-# # Expose the port the app runs on
-# EXPOSE 8000
-# # Set the entrypoint
-# ENTRYPOINT ["/app/entrypoint.sh"]
 # FROM python:3.9
@@ -63,10 +63,10 @@
 # ARG VERSION=v0.5.3.post1
-FROM vllm/vllm-openai:latest
-ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
-EXPOSE 8000
-WORKDIR /app
-COPY entrypoint.sh /app/entrypoint.sh
-RUN chmod +x /app/entrypoint.sh
-ENTRYPOINT [ "/app/entrypoint.sh" ]

+# Use the official vLLM image as the base image
+FROM vllm/vllm-openai:latest
+# Install debugging tools
+RUN apt-get update && apt-get install -y procps vim
+# Set environment variables
+ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
+ENV HF_HOME="/tmp/huggingface"
+ENV XDG_CACHE_HOME="/tmp/cache"
+ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
+ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
+ENV VLLM_USE_MODELSCOPE="false"
+ENV VLLM_DISABLE_USAGE_STATS="true"
+ENV XDG_CONFIG_HOME="/tmp/config"
+# Ensure PATH includes common Python locations
+ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
+# Set the working directory
+WORKDIR /app
+# Copy your entrypoint script
+COPY entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+# Expose the port the app runs on
+EXPOSE 8000
+# Set the entrypoint
+ENTRYPOINT ["/app/entrypoint.sh"]
 # FROM python:3.9
 # ARG VERSION=v0.5.3.post1
+# FROM vllm/vllm-openai:latest
+# ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
+# EXPOSE 8000
+# WORKDIR /app
+# COPY entrypoint.sh /app/entrypoint.sh
+# RUN chmod +x /app/entrypoint.sh
+# ENTRYPOINT [ "/app/entrypoint.sh" ]

entrypoint.sh CHANGED Viewed

@@ -1,64 +1,64 @@
 #!/bin/bash
 # Default values
-# MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
 # MODEL=${MODEL:-"EleutherAI/pythia-70m"}
-# DTYPE=${DTYPE:-"half"}
-# MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
-# MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
-# GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
-# MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
-# ENFORCE_EAGER=${ENFORCE_EAGER:-true}
-# # Disable usage stats via environment variable
-# export VLLM_DISABLE_USAGE_STATS=true
-# # Print environment for debugging
-# echo "Environment variables:"
-# env
-# # Create and set permissions for the config directory
-# CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
-# if [ ! -d "$CONFIG_DIR" ]; then
-#     mkdir -p "$CONFIG_DIR"
-# fi
-# chmod -R 777 "$CONFIG_DIR"
-# echo "Permissions for $CONFIG_DIR:"
-# ls -la "$CONFIG_DIR"
-# # Check and set permissions for directories
-# for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
-#     if [ ! -d "$dir" ]; then
-#         mkdir -p "$dir"
-#     fi
-#     chmod -R 777 "$dir"
-#     echo "Permissions for $dir:"
-#     ls -la "$dir"
-# done
-# # Construct the command
-# CMD="vllm serve $MODEL \
-# --host 0.0.0.0 \
-# --port 8000 \
-# --dtype $DTYPE \
-# --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
-# --max-num-seqs $MAX_NUM_SEQS \
-# --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
-# --max-model-len $MAX_MODEL_LEN"
-# # Add enforce-eager only if it's set to true
-# if [ "$ENFORCE_EAGER" = "true" ]; then
-#     CMD="$CMD --enforce-eager"
-# fi
-python3 -m vllm.entrypoints.openai.api_server \
-        --model EleutherAI/pythia-70m \
-        --gpu-memory-utilization 0.9 \
-        --max-model-len 200
-# # Execute the command
-# echo "Running command: $CMD"
-# exec $CMD

 #!/bin/bash
 # Default values
+MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
 # MODEL=${MODEL:-"EleutherAI/pythia-70m"}
+DTYPE=${DTYPE:-"half"}
+MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
+MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
+GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
+MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
+ENFORCE_EAGER=${ENFORCE_EAGER:-true}
+# Disable usage stats via environment variable
+export VLLM_DISABLE_USAGE_STATS=true
+# Print environment for debugging
+echo "Environment variables:"
+env
+# Create and set permissions for the config directory
+CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
+if [ ! -d "$CONFIG_DIR" ]; then
+    mkdir -p "$CONFIG_DIR"
+fi
+chmod -R 777 "$CONFIG_DIR"
+echo "Permissions for $CONFIG_DIR:"
+ls -la "$CONFIG_DIR"
+# Check and set permissions for directories
+for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
+    if [ ! -d "$dir" ]; then
+        mkdir -p "$dir"
+    fi
+    chmod -R 777 "$dir"
+    echo "Permissions for $dir:"
+    ls -la "$dir"
+done
+# Construct the command
+CMD="vllm serve $MODEL \
+--host 0.0.0.0 \
+--port 8000 \
+--dtype $DTYPE \
+--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
+--max-num-seqs $MAX_NUM_SEQS \
+--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
+--max-model-len $MAX_MODEL_LEN"
+# Add enforce-eager only if it's set to true
+if [ "$ENFORCE_EAGER" = "true" ]; then
+    CMD="$CMD --enforce-eager"
+fi
+# python3 -m vllm.entrypoints.openai.api_server \
+#         --model EleutherAI/pythia-70m \
+#         --gpu-memory-utilization 0.9 \
+#         --max-model-len 200
+# Execute the command
+echo "Running command: $CMD"
+exec $CMD