KeivanR commited on
Commit
5d27647
·
1 Parent(s): da9a9de

app front and hf repo in config

Browse files
app.py CHANGED
@@ -4,15 +4,37 @@ os.environ['HF_HOME'] = '/tmp/.cache/huggingface' # Use /tmp in Spaces
4
  os.makedirs(os.environ['HF_HOME'], exist_ok=True) # Ensure directory exists
5
 
6
  from fastapi import FastAPI
 
7
  from qwen_classifier.predict import predict_single # Your existing function
8
  from qwen_classifier.evaluate import evaluate_batch # Your existing function
9
  import torch
10
  from huggingface_hub import login
11
  from qwen_classifier.model import QwenClassifier
 
12
  from pydantic import BaseModel
13
 
14
  app = FastAPI(title="Qwen Classifier")
15
- hf_repo = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  @app.on_event("startup")
18
  async def load_model():
@@ -28,7 +50,7 @@ async def load_model():
28
 
29
  # Load model (will cache in /home/user/.cache/huggingface)
30
  app.state.model = QwenClassifier.from_pretrained(
31
- hf_repo,
32
  )
33
  print("Model loaded successfully!")
34
 
@@ -39,8 +61,12 @@ class PredictionRequest(BaseModel):
39
 
40
  @app.post("/predict")
41
  async def predict(request: PredictionRequest): # ← Validates input automatically
42
- return predict_single(request.text, hf_repo, backend="local")
43
 
44
  @app.post("/evaluate")
45
  async def evaluate(request: PredictionRequest): # ← Validates input automatically
46
- return evaluate_batch(request.text, backend="local")
 
 
 
 
 
4
  os.makedirs(os.environ['HF_HOME'], exist_ok=True) # Ensure directory exists
5
 
6
  from fastapi import FastAPI
7
+ from fastapi.responses import HTMLResponse
8
  from qwen_classifier.predict import predict_single # Your existing function
9
  from qwen_classifier.evaluate import evaluate_batch # Your existing function
10
  import torch
11
  from huggingface_hub import login
12
  from qwen_classifier.model import QwenClassifier
13
+ from qwen_classifier.config import HF_REPO, SPACE_URL
14
  from pydantic import BaseModel
15
 
16
  app = FastAPI(title="Qwen Classifier")
17
+
18
+ # Add this endpoint
19
+ @app.get("/", response_class=HTMLResponse)
20
+ def home():
21
+ return f"""
22
+ <html>
23
+ <head>
24
+ <title>Qwen Classifier</title>
25
+ </head>
26
+ <body>
27
+ <h1>Qwen Classifier API</h1>
28
+ <p>Available endpoints:</p>
29
+ <ul>
30
+ <li><strong>POST /predict</strong> - Classify text</li>
31
+ <li><strong>POST /evaluate</strong> - Evaluate batch text prediction from zip file</li>
32
+ <li><strong>GET /health</strong> - Check API status</li>
33
+ </ul>
34
+ <p>Try it: <code>curl -X POST {SPACE_URL}/predict -H "Content-Type: application/json" -d '{"text":"your text"}'</code></p>
35
+ </body>
36
+ </html>
37
+ """
38
 
39
  @app.on_event("startup")
40
  async def load_model():
 
50
 
51
  # Load model (will cache in /home/user/.cache/huggingface)
52
  app.state.model = QwenClassifier.from_pretrained(
53
+ HF_REPO,
54
  )
55
  print("Model loaded successfully!")
56
 
 
61
 
62
  @app.post("/predict")
63
  async def predict(request: PredictionRequest): # ← Validates input automatically
64
+ return predict_single(request.text, HF_REPO, backend="local")
65
 
66
  @app.post("/evaluate")
67
  async def evaluate(request: PredictionRequest): # ← Validates input automatically
68
+ return evaluate_batch(request.text, HF_REPO, backend="local")
69
+
70
+ @app.get("/health")
71
+ def health_check():
72
+ return {"status": "healthy", "model": "loaded"}
qwen_classifier/cli.py CHANGED
@@ -2,6 +2,7 @@ import click
2
  from .predict import predict_single
3
  import warnings
4
  from transformers import logging as hf_logging
 
5
 
6
  def configure_logging(debug):
7
  """Configure warning and logging levels based on debug flag"""
@@ -24,7 +25,7 @@ def cli(ctx, debug):
24
  @cli.command()
25
  @click.argument('text')
26
  @click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
27
- @click.option('--hf-repo', default="KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446", help="Hugging Face model repo")
28
  @click.option('--backend',
29
  type=click.Choice(['local', 'hf'], case_sensitive=False),
30
  default='local',
 
2
  from .predict import predict_single
3
  import warnings
4
  from transformers import logging as hf_logging
5
+ from .config import HF_REPO
6
 
7
  def configure_logging(debug):
8
  """Configure warning and logging levels based on debug flag"""
 
25
  @cli.command()
26
  @click.argument('text')
27
  @click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
28
+ @click.option('--hf-repo', default=HF_REPO, help="Hugging Face model repo")
29
  @click.option('--backend',
30
  type=click.Choice(['local', 'hf'], case_sensitive=False),
31
  default='local',
qwen_classifier/config.py CHANGED
@@ -5,6 +5,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
5
 
6
  # HF API config
7
  SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
 
8
 
9
  TAG_NAMES = [
10
  'games',
@@ -16,5 +17,4 @@ TAG_NAMES = [
16
  'probabilities',
17
  'strings',
18
  'trees'
19
-
20
  ]
 
5
 
6
  # HF API config
7
  SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
8
+ HF_REPO = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_3layer-1743180976'
9
 
10
  TAG_NAMES = [
11
  'games',
 
17
  'probabilities',
18
  'strings',
19
  'trees'
 
20
  ]
qwen_classifier/evaluate.py CHANGED
@@ -1,6 +1,7 @@
1
  import numpy as np
2
  from sklearn.metrics import classification_report
3
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
4
  import zipfile
5
  import json
6
  import pandas as pd
@@ -12,7 +13,7 @@ from .config import TAG_NAMES, DEVICE, SPACE_URL
12
 
13
  def load_data(test_data_path):
14
  # zip file handler
15
- zip_file = zipfile.ZipFile('code_classification_dataset.zip')
16
 
17
  # list available files in the container
18
  names = zip_file.namelist()
@@ -36,6 +37,20 @@ def load_data(test_data_path):
36
  return df
37
 
38
  def preprocessing(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  texts = df["prob_desc_description"].values.tolist()
40
  labels = df[TAG_NAMES].values.tolist()
41
 
 
1
  import numpy as np
2
  from sklearn.metrics import classification_report
3
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
4
+ from sklearn.preprocessing import MultiLabelBinarizer
5
  import zipfile
6
  import json
7
  import pandas as pd
 
13
 
14
  def load_data(test_data_path):
15
  # zip file handler
16
+ zip_file = zipfile.ZipFile(test_data_path)
17
 
18
  # list available files in the container
19
  names = zip_file.namelist()
 
37
  return df
38
 
39
  def preprocessing(df):
40
+ mlb = MultiLabelBinarizer()
41
+ tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
42
+
43
+ # Filter tags and one-hot encode
44
+ df['tags_filtered'] = [[tag for tag in tags if tag in tags_to_encode] for tags in df["tags"]]
45
+ df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'] = df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'].apply(lambda x: ['other'])
46
+ encoded_tags = mlb.fit_transform(df['tags_filtered'])
47
+
48
+ # Create a new DataFrame with one-hot encoded columns
49
+ encoded_df = pd.DataFrame(encoded_tags, columns=mlb.classes_)
50
+
51
+ # Concatenate the encoded tags with the original DataFrame
52
+ df = pd.concat([df, encoded_df], axis=1)
53
+
54
  texts = df["prob_desc_description"].values.tolist()
55
  labels = df[TAG_NAMES].values.tolist()
56