KeivanR commited on
Commit
6a1e686
·
1 Parent(s): 72e6d75

other files

Browse files
.gitignore ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual environment
25
+ .venv/
26
+ venv/
27
+ env/
28
+ ENV/
29
+ env.bak/
30
+ venv.bak/
31
+
32
+ # Docker
33
+ docker-compose.yml
34
+ docker-compose.*.yml
35
+ .dockerignore
36
+ Dockerfile
37
+ docker/
38
+ containers/
39
+
40
+ # IDE & Editor
41
+ .vscode/
42
+ .idea/
43
+ *.swp
44
+ *.swo
45
+ *~
46
+ .DS_Store
47
+ ._*
48
+ *.bak
49
+
50
+ # Jupyter
51
+ .ipynb_checkpoints/
52
+ *.ipynb
53
+
54
+ # Testing
55
+ .coverage
56
+ htmlcov/
57
+ .pytest_cache/
58
+ nosetests.xml
59
+ coverage.xml
60
+ *.cover
61
+ *.log
62
+
63
+ # Logs
64
+ *.log
65
+ logs/
66
+
67
+ # Hugging Face cache (large files)
68
+ .cache/
69
+ .huggingface/
70
+
71
+ # Local data & configs
72
+ data/
73
+ *.csv
74
+ *.jsonl
75
+ *.parquet
76
+ *.db
77
+ *.sqlite3
78
+
79
+ # System files
80
+ Thumbs.db
81
+ ehthumbs.db
82
+ Desktop.ini
83
+ $RECYCLE.BIN/
84
+
85
+ # Project-specific (adjust as needed)
86
+ qwen_classifier/__pycache__/
87
+ qwen_classifier.egg-info/
qwen_classifier/__init__.py ADDED
File without changes
qwen_classifier/cli.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import click
2
+ from .predict import predict_single
3
+ import warnings
4
+ from transformers import logging as hf_logging
5
+
6
+ def configure_logging(debug):
7
+ """Configure warning and logging levels based on debug flag"""
8
+ if not debug:
9
+ warnings.filterwarnings("ignore", message="Some weights of the model checkpoint")
10
+ hf_logging.set_verbosity_error()
11
+ else:
12
+ hf_logging.set_verbosity_info()
13
+ warnings.simplefilter("default")
14
+
15
+ @click.group()
16
+ @click.option('--debug', is_flag=True, help="Enable debug output including warnings")
17
+ @click.pass_context
18
+ def cli(ctx, debug):
19
+ """Qwen Multi-label Classifier CLI"""
20
+ ctx.ensure_object(dict)
21
+ ctx.obj['DEBUG'] = debug
22
+ configure_logging(debug)
23
+
24
+ @cli.command()
25
+ @click.argument('text')
26
+ @click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
27
+ @click.option('--hf-repo', default="KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446", help="Hugging Face model repo")
28
+ @click.option('--backend',
29
+ type=click.Choice(['local', 'hf'], case_sensitive=False),
30
+ default='local',
31
+ help="Inference backend: 'local' (your machine) or 'hf' (Hugging Face API)")
32
+ @click.pass_context
33
+ def predict(ctx, text, hf_repo, backend, hf_token):
34
+ """Make prediction on a single text"""
35
+ if ctx.obj['DEBUG']:
36
+ click.echo("Debug mode enabled - showing all warnings")
37
+
38
+ results = predict_single(
39
+ text,
40
+ hf_repo,
41
+ backend=backend,
42
+ hf_token=hf_token
43
+ )
44
+ click.echo(f"Prediction results: {results}")
qwen_classifier/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ # Local config
4
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
5
+
6
+ # HF API config
7
+ TAG_NAMES = [
8
+ 'games',
9
+ 'geometry',
10
+ 'graphs',
11
+ 'math',
12
+ 'number theory',
13
+ 'other',
14
+ 'probabilities',
15
+ 'strings',
16
+ 'trees'
17
+ ]
qwen_classifier/evaluate.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics import classification_report
3
+
4
+ def evaluate_model(test_data_path):
5
+ # Load your test data
6
+ # Implement evaluation logic
7
+ # Return metrics like precision, recall, f1-score
8
+ return metrics
qwen_classifier/model.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import AutoModel, PreTrainedModel, AutoConfig
3
+
4
+ class QwenClassifier(PreTrainedModel):
5
+ def __init__(self, config):
6
+ super().__init__(config)
7
+ self.qwen_model = AutoModel.from_pretrained(config.model_name) # Load Qwen model
8
+
9
+ self.classifier = nn.Linear(self.qwen_model.config.hidden_size, config.num_labels)
10
+ self.loss_fn = None
11
+
12
+ def forward(self, input_ids, attention_mask, labels=None):
13
+ outputs = self.qwen_model(input_ids=input_ids, attention_mask=attention_mask)
14
+ pooled = outputs.last_hidden_state.mean(dim=1)
15
+ logits = self.classifier(pooled)
16
+ #logits = nn.functional.sigmoid(logits)
17
+
18
+ if labels is not None:
19
+ loss = self.loss_fn(logits, labels)
20
+ return loss, logits
21
+ return logits
22
+
23
+ @classmethod
24
+ def from_pretrained(cls, model_name):
25
+ config = AutoConfig.from_pretrained(model_name)
26
+ config.model_name = model_name # Store model name
27
+ return super().from_pretrained(model_name, config=config)
qwen_classifier/predict.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import requests
3
+ from .config import TAG_NAMES
4
+
5
+ # Local model setup (only load if needed)
6
+ local_model = None
7
+ local_tokenizer = None
8
+
9
+ def predict_single(text, hf_repo, backend="local", hf_token=None):
10
+ if backend == "local":
11
+ return _predict_local(text, hf_repo)
12
+ elif backend == "hf":
13
+ return _predict_hf_api(text, hf_token)
14
+ else:
15
+ raise ValueError(f"Unknown backend: {backend}")
16
+
17
+ def _predict_local(text, hf_repo):
18
+ global local_model, local_tokenizer
19
+
20
+ # Lazy-loading to avoid slow startup
21
+ if local_model is None:
22
+ from .model import QwenClassifier
23
+ from transformers import AutoTokenizer
24
+
25
+ local_model = QwenClassifier.from_pretrained(hf_repo).eval()
26
+ local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
27
+
28
+ inputs = local_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
29
+ with torch.no_grad():
30
+ logits = local_model(**inputs)
31
+ return _process_output(logits)
32
+
33
+ def _predict_hf_api(text, hf_token=None):
34
+ # Use your Space endpoint instead of direct model API
35
+ SPACE_URL = "https://KeivanR/qwen-classifier-demo"
36
+
37
+ try:
38
+ response = requests.post(
39
+ f"{SPACE_URL}/predict",
40
+ json={"text": text},
41
+ headers={"Authorization": f"Bearer {hf_token}"} if hf_token else {}
42
+ )
43
+ return response.json()
44
+ except Exception as e:
45
+ raise ValueError(f"Space API Error: {str(e)}")
46
+
47
+ def _process_output(logits):
48
+ probs = torch.sigmoid(logits)
49
+ s = ''
50
+ for tag, prob in zip(TAG_NAMES, probs[0]):
51
+ if prob>0.5:
52
+ s += f"{tag}({prob:.2f}), "
53
+ return s[:-2]
54
+
55
+
qwen_classifier/utils.py ADDED
File without changes
setup.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="qwen_classifier",
5
+ version="0.1",
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ 'torch',
9
+ 'transformers',
10
+ 'click',
11
+ 'scikit-learn',
12
+ 'huggingface_hub',
13
+ 'requests'
14
+ ],
15
+ entry_points={
16
+ 'console_scripts': [
17
+ 'qwen-clf=qwen_classifier.cli:cli',
18
+ ],
19
+ },
20
+ )