KeivanR commited on
Commit
65afda8
·
1 Parent(s): f655296

fix dockerfile and requirements

Browse files
Files changed (3) hide show
  1. Dockerfile +35 -11
  2. app.py +10 -5
  3. requirements.txt +8 -2
Dockerfile CHANGED
@@ -1,17 +1,41 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
 
4
- FROM python:3.9
 
 
 
 
 
 
 
 
 
5
 
6
- RUN useradd -m -u 1000 user
7
- USER user
8
- ENV PATH="/home/user/.local/bin:$PATH"
9
 
10
- WORKDIR /app
 
 
11
 
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
 
14
 
15
- COPY --chown=user . /app
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
17
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image with Python and CUDA for GPU support
2
+ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
3
 
4
+ # Install system dependencies
5
+ RUN apt-get update && \
6
+ apt-get install -y --no-install-recommends \
7
+ python3.10 \
8
+ python3-pip \
9
+ python3.10-venv \
10
+ git \
11
+ libgl1 \
12
+ libglib2.0-0 && \
13
+ rm -rf /var/lib/apt/lists/*
14
 
15
+ # Create and activate virtual environment
16
+ RUN python3.10 -m venv /opt/venv
17
+ ENV PATH="/opt/venv/bin:$PATH"
18
 
19
+ # Install Python dependencies first (for better caching)
20
+ COPY requirements.txt .
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
 
23
+ # Install PyTorch with CUDA support
24
+ RUN pip install --no-cache-dir \
25
+ torch==2.1.2+cu121 \
26
+ torchvision==0.16.2+cu121 \
27
+ --extra-index-url https://download.pytorch.org/whl/cu121
28
 
29
+ # Copy entire application (including model.py)
30
+ COPY . .
31
 
32
+ # Special model loading step
33
+ RUN python3 -c "
34
+ from model import QwenClassifier
35
+ QwenClassifier.from_pretrained('KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446')
36
+ print('Model loaded successfully')
37
+ "
38
+
39
+ # Run FastAPI app
40
+ EXPOSE 7860
41
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,9 +1,14 @@
1
- from transformers import pipeline
2
  from fastapi import FastAPI
 
 
3
 
4
- app = FastAPI()
5
- classifier = pipeline("text-classification", model="KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446")
 
 
 
 
6
 
7
  @app.post("/predict")
8
- def predict(text: str):
9
- return classifier(text)
 
 
1
  from fastapi import FastAPI
2
+ from qwen_classifier.predict import predict_single # Your existing function
3
+ import torch
4
 
5
+ app = FastAPI(title="Qwen Classifier")
6
+
7
+ @app.on_event("startup")
8
+ async def load_model():
9
+ # Warm up GPU
10
+ torch.zeros(1).cuda()
11
 
12
  @app.post("/predict")
13
+ async def predict(text: str):
14
+ return predict_single(text, backend="local")
requirements.txt CHANGED
@@ -1,2 +1,8 @@
1
- fastapi
2
- uvicorn[standard]
 
 
 
 
 
 
 
1
+ fastapi>=0.95.0
2
+ uvicorn>=0.22.0
3
+ transformers>=4.36.0
4
+ accelerate>=0.24.0
5
+ huggingface-hub>=0.19.0
6
+ torch>=2.1.0
7
+ sentencepiece>=0.1.99
8
+ auto-gptq>=0.5.0 # If using quantized model