Spaces:
Sleeping
Sleeping
Adds notebook and setup for testing models.
Browse files- Dockerfile +2 -0
- model/__init__.py +0 -0
- model/distilbert.py +58 -0
- model/llama.py +37 -0
- {notebooks β model/notebooks}/distilbert_baseline_05_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_baseline_10_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_baseline_15_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_baseline_20_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_baseline_20_epochs_prompt_input.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_baseline_25_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_prompt_02_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/distilbert_prompt_05_epochs.ipynb +0 -0
- model/notebooks/inference.ipynb +152 -0
- {notebooks β model/notebooks}/llama_baseline_05_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/llama_baseline_10_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/llama_baseline_15_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/llama_baseline_20_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/llama_baseline_20_epochs_prompt_input.ipynb +0 -0
- {notebooks β model/notebooks}/llama_baseline_25_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/llama_prompt_0.5_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_05_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_10_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_15_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_20_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_20_epochs_prompt_input.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_baseline_25_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_prompt_02_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/scibert_prompt_05_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_05_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_10_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_15_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_20_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_20_epochs_prompt_input.ipynb +0 -0
- {notebooks β model/notebooks}/t5_baseline_25_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_prompt_02_epochs.ipynb +0 -0
- {notebooks β model/notebooks}/t5_prompt_05_epochs.ipynb +0 -0
- model/scibert.py +67 -0
- model/t5.py +54 -0
- notebooks/inference.ipynb +0 -313
- requirements.txt +8 -2
- setup.py +6 -0
- start_server.sh +1 -1
Dockerfile
CHANGED
@@ -90,6 +90,8 @@ RUN --mount=target=requirements.txt,source=requirements.txt \
|
|
90 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
91 |
COPY --chown=user . $HOME/app
|
92 |
|
|
|
|
|
93 |
RUN chmod +x start_server.sh
|
94 |
|
95 |
COPY --chown=user login.html /home/user/miniconda/lib/python3.9/site-packages/jupyter_server/templates/login.html
|
|
|
90 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
91 |
COPY --chown=user . $HOME/app
|
92 |
|
93 |
+
RUN pip install -e .
|
94 |
+
|
95 |
RUN chmod +x start_server.sh
|
96 |
|
97 |
COPY --chown=user login.html /home/user/miniconda/lib/python3.9/site-packages/jupyter_server/templates/login.html
|
model/__init__.py
ADDED
File without changes
|
model/distilbert.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModel
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
from safetensors.torch import load_file
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch
|
6 |
+
|
7 |
+
# Number of labels (update if different)
|
8 |
+
NUM_LABELS = 4
|
9 |
+
|
10 |
+
# Model with frozen DistilBERT weights
|
11 |
+
class DistilBertClassificationModel(nn.Module):
|
12 |
+
def __init__(
|
13 |
+
self,
|
14 |
+
model_path="distilbert/distilbert-base-uncased",
|
15 |
+
freeze_weights=True,
|
16 |
+
):
|
17 |
+
super(DistilBertClassificationModel, self).__init__()
|
18 |
+
if model_path == "distilbert/distilbert-base-uncased":
|
19 |
+
self.base_model = AutoModel.from_pretrained(model_path)
|
20 |
+
else:
|
21 |
+
pytorch_model_path = hf_hub_download(
|
22 |
+
repo_id=model_path,
|
23 |
+
repo_type="model",
|
24 |
+
filename="model.safetensors"
|
25 |
+
)
|
26 |
+
state_dict = load_file(pytorch_model_path)
|
27 |
+
filtered_state_dict = {
|
28 |
+
k.replace("base_model.", ""): v
|
29 |
+
for k, v in state_dict.items()
|
30 |
+
if not k.startswith("classifier.")
|
31 |
+
}
|
32 |
+
|
33 |
+
self.base_model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", state_dict=filtered_state_dict)
|
34 |
+
|
35 |
+
# For push to hub.
|
36 |
+
self.config = self.base_model.config
|
37 |
+
|
38 |
+
# Freeze the base model's weights
|
39 |
+
if freeze_weights:
|
40 |
+
for param in self.base_model.parameters():
|
41 |
+
param.requires_grad = False
|
42 |
+
|
43 |
+
# Add a classification head
|
44 |
+
self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
|
45 |
+
|
46 |
+
def forward(self, input_ids, attention_mask, labels=None):
|
47 |
+
with torch.no_grad(): # No gradients for the base model
|
48 |
+
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
|
49 |
+
|
50 |
+
# Sum hidden states over the sequence dimension
|
51 |
+
summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over sequence length
|
52 |
+
|
53 |
+
logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
|
54 |
+
loss = None
|
55 |
+
if labels is not None:
|
56 |
+
loss_fn = nn.BCEWithLogitsLoss()
|
57 |
+
loss = loss_fn(logits, labels.float())
|
58 |
+
return {"loss": loss, "logits": logits}
|
model/llama.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from transformers import AutoModel
|
5 |
+
|
6 |
+
NUM_LABELS = 4
|
7 |
+
|
8 |
+
# Model with frozen LLaMA weights
|
9 |
+
class LlamaClassificationModel(nn.Module):
|
10 |
+
def __init__(self, model_path = "meta-llama/Llama-3.2-1B", freeze_weights = True):
|
11 |
+
super(LlamaClassificationModel, self).__init__()
|
12 |
+
self.base_model = AutoModel.from_pretrained(model_path)
|
13 |
+
|
14 |
+
# For push to hub.
|
15 |
+
self.config = self.base_model.config
|
16 |
+
|
17 |
+
# Freeze the base model's weights
|
18 |
+
if freeze_weights:
|
19 |
+
for param in self.base_model.parameters():
|
20 |
+
param.requires_grad = False
|
21 |
+
|
22 |
+
# Add a classification head
|
23 |
+
self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
|
24 |
+
|
25 |
+
def forward(self, input_ids, attention_mask, labels=None):
|
26 |
+
with torch.no_grad(): # No gradients for the base model
|
27 |
+
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
|
28 |
+
|
29 |
+
# Sum hidden states over the sequence dimension
|
30 |
+
summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over sequence length
|
31 |
+
|
32 |
+
logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
|
33 |
+
loss = None
|
34 |
+
if labels is not None:
|
35 |
+
loss_fn = nn.BCEWithLogitsLoss()
|
36 |
+
loss = loss_fn(logits, labels.float())
|
37 |
+
return {"loss": loss, "logits": logits}
|
{notebooks β model/notebooks}/distilbert_baseline_05_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_baseline_10_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_baseline_15_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_baseline_20_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_baseline_20_epochs_prompt_input.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_baseline_25_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_prompt_02_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/distilbert_prompt_05_epochs.ipynb
RENAMED
File without changes
|
model/notebooks/inference.ipynb
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Inference"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": null,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"import torch\n",
|
17 |
+
"\n",
|
18 |
+
"from huggingface_hub import hf_hub_download\n",
|
19 |
+
"from transformers import AutoTokenizer\n",
|
20 |
+
"\n",
|
21 |
+
"from model.distilbert import DistilBertClassificationModel\n",
|
22 |
+
"from model.scibert import SciBertClassificationModel\n",
|
23 |
+
"from model.llama import LlamaClassificationModel\n",
|
24 |
+
"from model.t5 import T5ClassificationModel"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "markdown",
|
29 |
+
"metadata": {},
|
30 |
+
"source": [
|
31 |
+
"## Model Selection\n",
|
32 |
+
"Uncomment desired `repo_id` and corresponding `model` and input type."
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"execution_count": 2,
|
38 |
+
"metadata": {},
|
39 |
+
"outputs": [],
|
40 |
+
"source": [
|
41 |
+
"# Baseline\n",
|
42 |
+
"repo_id = \"ppak10/defect-classification-distilbert-baseline-25-epochs\"\n",
|
43 |
+
"# repo_id = \"ppak10/defect-classification-scibert-baseline-25-epochs\"\n",
|
44 |
+
"# repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\"\n",
|
45 |
+
"# repo_id = \"ppak10/defect-classification-t5-baseline-25-epochs\"\n",
|
46 |
+
"\n",
|
47 |
+
"# Prompt \n",
|
48 |
+
"# repo_id = \"ppak10/defect-classification-distilbert-prompt-02-epochs\"\n",
|
49 |
+
"# repo_id = \"ppak10/defect-classification-scibert-prompt-02-epochs\"\n",
|
50 |
+
"# repo_id = \"ppak10/defect-classification-llama-prompt-02-epochs\"\n",
|
51 |
+
"# repo_id = \"ppak10/defect-classification-t5-prompt-02-epochs\"\n",
|
52 |
+
"\n",
|
53 |
+
"# Initialize the model\n",
|
54 |
+
"model = DistilBertClassificationModel(repo_id)\n",
|
55 |
+
"# model = SciBertClassificationModel(repo_id)\n",
|
56 |
+
"# model = LlamaClassificationModel()\n",
|
57 |
+
"# model = T5ClassificationModel(repo_id)"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": null,
|
63 |
+
"metadata": {},
|
64 |
+
"outputs": [],
|
65 |
+
"source": [
|
66 |
+
"# Load the tokenizer\n",
|
67 |
+
"tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
|
68 |
+
"\n",
|
69 |
+
"# Loads classification head weights\n",
|
70 |
+
"classification_head_path = hf_hub_download(\n",
|
71 |
+
" repo_id=repo_id,\n",
|
72 |
+
" repo_type=\"model\",\n",
|
73 |
+
" filename=\"classification_head.pt\"\n",
|
74 |
+
")\n",
|
75 |
+
"\n",
|
76 |
+
"model.classifier.load_state_dict(torch.load(classification_head_path, map_location=torch.device(\"cpu\")))\n",
|
77 |
+
"model.eval()"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "code",
|
82 |
+
"execution_count": null,
|
83 |
+
"metadata": {},
|
84 |
+
"outputs": [],
|
85 |
+
"source": [
|
86 |
+
"# Baseline\n",
|
87 |
+
"# text = \"Ti-6Al-4V[SEP]280.0 W[SEP]400.0 mm/s[SEP]100.0 microns[SEP]50.0 microns[SEP]100.0 microns\"\n",
|
88 |
+
"\n",
|
89 |
+
"# Prompt\n",
|
90 |
+
"text = \"What are the likely imperfections that occur in Ti-6Al-4V L-PBF builds at 280.0 W, given a 100.0 microns beam diameter, a 400.0 mm/s scan speed, a 100.0 microns hatch spacing, and a 50.0 microns layer height?\""
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cell_type": "code",
|
95 |
+
"execution_count": null,
|
96 |
+
"metadata": {},
|
97 |
+
"outputs": [],
|
98 |
+
"source": [
|
99 |
+
"# Tokenize inputs \n",
|
100 |
+
"inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, padding=\"max_length\", max_length=256)\n",
|
101 |
+
"\n",
|
102 |
+
"# For scibert\n",
|
103 |
+
"inputs_kwargs = {}\n",
|
104 |
+
"for key, value in inputs.items():\n",
|
105 |
+
" if key not in [\"token_type_ids\"]:\n",
|
106 |
+
" inputs_kwargs[key] = value\n",
|
107 |
+
"\n",
|
108 |
+
"# Perform inference\n",
|
109 |
+
"outputs = model(**inputs_kwargs)\n",
|
110 |
+
"\n",
|
111 |
+
"# Extract logits and apply sigmoid activation for multi-label classification\n",
|
112 |
+
"probs = torch.sigmoid(outputs[\"logits\"])\n",
|
113 |
+
"\n",
|
114 |
+
"# Convert probabilities to one-hot encoded labels\n",
|
115 |
+
"preds = (probs > 0.5).int().squeeze()\n",
|
116 |
+
"\n",
|
117 |
+
"# One hot encoded classifications\n",
|
118 |
+
"classifications = [\"None\", \"Keyhole\", \"Lack of Fusion\", \"Balling\"]\n",
|
119 |
+
" \n",
|
120 |
+
"print([classifications[index] for index, encoding in enumerate(preds) if encoding == 1])"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "code",
|
125 |
+
"execution_count": null,
|
126 |
+
"metadata": {},
|
127 |
+
"outputs": [],
|
128 |
+
"source": []
|
129 |
+
}
|
130 |
+
],
|
131 |
+
"metadata": {
|
132 |
+
"kernelspec": {
|
133 |
+
"display_name": "venv",
|
134 |
+
"language": "python",
|
135 |
+
"name": "python3"
|
136 |
+
},
|
137 |
+
"language_info": {
|
138 |
+
"codemirror_mode": {
|
139 |
+
"name": "ipython",
|
140 |
+
"version": 3
|
141 |
+
},
|
142 |
+
"file_extension": ".py",
|
143 |
+
"mimetype": "text/x-python",
|
144 |
+
"name": "python",
|
145 |
+
"nbconvert_exporter": "python",
|
146 |
+
"pygments_lexer": "ipython3",
|
147 |
+
"version": "3.12.3"
|
148 |
+
}
|
149 |
+
},
|
150 |
+
"nbformat": 4,
|
151 |
+
"nbformat_minor": 2
|
152 |
+
}
|
{notebooks β model/notebooks}/llama_baseline_05_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_baseline_10_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_baseline_15_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_baseline_20_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_baseline_20_epochs_prompt_input.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_baseline_25_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/llama_prompt_0.5_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_05_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_10_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_15_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_20_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_20_epochs_prompt_input.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_baseline_25_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_prompt_02_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/scibert_prompt_05_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_05_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_10_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_15_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_20_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_20_epochs_prompt_input.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_baseline_25_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_prompt_02_epochs.ipynb
RENAMED
File without changes
|
{notebooks β model/notebooks}/t5_prompt_05_epochs.ipynb
RENAMED
File without changes
|
model/scibert.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModel
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
from safetensors.torch import load_file
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch
|
6 |
+
|
7 |
+
# Number of labels (update if different)
|
8 |
+
NUM_LABELS = 4
|
9 |
+
|
10 |
+
class SciBertClassificationModel(nn.Module):
|
11 |
+
def __init__(self, model_path="allenai/scibert_scivocab_uncased", freeze_weights=True):
|
12 |
+
super(SciBertClassificationModel, self).__init__()
|
13 |
+
if model_path == "allenai/scibert_scivocab_uncased":
|
14 |
+
self.base_model = AutoModel.from_pretrained(model_path)
|
15 |
+
else:
|
16 |
+
pytorch_model_path = hf_hub_download(
|
17 |
+
repo_id=model_path,
|
18 |
+
repo_type="model",
|
19 |
+
filename="model.safetensors"
|
20 |
+
)
|
21 |
+
state_dict = load_file(pytorch_model_path)
|
22 |
+
filtered_state_dict = {
|
23 |
+
k.replace("base_model.", ""): v
|
24 |
+
for k, v in state_dict.items()
|
25 |
+
if not k.startswith("classifier.")
|
26 |
+
}
|
27 |
+
|
28 |
+
self.base_model = AutoModel.from_pretrained("allenai/scibert_scivocab_uncased", state_dict=filtered_state_dict)
|
29 |
+
|
30 |
+
# For push to hub.
|
31 |
+
self.config = self.base_model.config
|
32 |
+
|
33 |
+
# Freeze the base model's weights
|
34 |
+
if freeze_weights:
|
35 |
+
for param in self.base_model.parameters():
|
36 |
+
param.requires_grad = False
|
37 |
+
|
38 |
+
# Add a classification head
|
39 |
+
self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
|
40 |
+
|
41 |
+
def forward(self, input_ids, attention_mask, labels=None):
|
42 |
+
with torch.no_grad(): # No gradients for the base model
|
43 |
+
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
|
44 |
+
|
45 |
+
# Ensure the tensor is contiguous before passing to the classifier
|
46 |
+
# cls_token_representation = outputs.last_hidden_state[:, 0, :].contiguous()
|
47 |
+
# logits = self.classifier(cls_token_representation)
|
48 |
+
|
49 |
+
# Sum token representations
|
50 |
+
summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over the sequence length (dim=1)
|
51 |
+
|
52 |
+
logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
|
53 |
+
|
54 |
+
loss = None
|
55 |
+
if labels is not None:
|
56 |
+
loss_fn = nn.BCEWithLogitsLoss()
|
57 |
+
loss = loss_fn(logits, labels.float())
|
58 |
+
return {"loss": loss, "logits": logits}
|
59 |
+
|
60 |
+
def state_dict(self, *args, **kwargs):
|
61 |
+
# Get the state dictionary
|
62 |
+
state_dict = super().state_dict(*args, **kwargs)
|
63 |
+
# Ensure all tensors are contiguous
|
64 |
+
for key, tensor in state_dict.items():
|
65 |
+
if isinstance(tensor, torch.Tensor) and not tensor.is_contiguous():
|
66 |
+
state_dict[key] = tensor.contiguous()
|
67 |
+
return state_dict
|
model/t5.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import T5EncoderModel, T5Config
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch
|
5 |
+
|
6 |
+
NUM_LABELS = 4
|
7 |
+
|
8 |
+
class T5ClassificationModel(nn.Module):
|
9 |
+
def __init__(self, model_path="t5-small", freeze_weights=True):
|
10 |
+
super(T5ClassificationModel, self).__init__()
|
11 |
+
if model_path == "t5-small":
|
12 |
+
self.base_model = T5EncoderModel.from_pretrained(model_path)
|
13 |
+
else:
|
14 |
+
pytorch_model_path = hf_hub_download(
|
15 |
+
repo_id=model_path,
|
16 |
+
repo_type="model",
|
17 |
+
filename="pytorch_model.bin"
|
18 |
+
)
|
19 |
+
config = T5Config.from_pretrained(model_path)
|
20 |
+
self.base_model = T5EncoderModel(config)
|
21 |
+
|
22 |
+
# Load the state_dict and remove unwanted keys
|
23 |
+
state_dict = torch.load(pytorch_model_path)
|
24 |
+
filtered_state_dict = {
|
25 |
+
k.replace("base_model.", ""): v
|
26 |
+
for k, v in state_dict.items()
|
27 |
+
if not k.startswith("classifier.")
|
28 |
+
}
|
29 |
+
self.base_model.load_state_dict(filtered_state_dict)
|
30 |
+
|
31 |
+
# For push to hub.
|
32 |
+
self.config = self.base_model.config
|
33 |
+
|
34 |
+
# Freeze the base model's weights
|
35 |
+
if freeze_weights:
|
36 |
+
for param in self.base_model.parameters():
|
37 |
+
param.requires_grad = False
|
38 |
+
|
39 |
+
# Add a classification head
|
40 |
+
self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
|
41 |
+
|
42 |
+
def forward(self, input_ids, attention_mask, labels=None):
|
43 |
+
with torch.no_grad(): # No gradients for the base model
|
44 |
+
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
|
45 |
+
|
46 |
+
# Sum token representations
|
47 |
+
summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over the sequence length (dim=1)
|
48 |
+
|
49 |
+
logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
|
50 |
+
loss = None
|
51 |
+
if labels is not None:
|
52 |
+
loss_fn = nn.BCEWithLogitsLoss()
|
53 |
+
loss = loss_fn(logits, labels.float())
|
54 |
+
return {"loss": loss, "logits": logits}
|
notebooks/inference.ipynb
DELETED
@@ -1,313 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 3,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import torch\n",
|
10 |
-
"\n",
|
11 |
-
"from huggingface_hub import hf_hub_download\n",
|
12 |
-
"from transformers import AutoTokenizer\n",
|
13 |
-
"\n",
|
14 |
-
"from model.distilbert import DistilBertClassificationModel\n",
|
15 |
-
"from model.llama import LlamaClassificationModel"
|
16 |
-
]
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"cell_type": "code",
|
20 |
-
"execution_count": 2,
|
21 |
-
"metadata": {},
|
22 |
-
"outputs": [],
|
23 |
-
"source": [
|
24 |
-
"repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\""
|
25 |
-
]
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"cell_type": "code",
|
29 |
-
"execution_count": 3,
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [
|
32 |
-
{
|
33 |
-
"name": "stdout",
|
34 |
-
"output_type": "stream",
|
35 |
-
"text": [
|
36 |
-
"LlamaConfig {\n",
|
37 |
-
" \"_attn_implementation_autoset\": true,\n",
|
38 |
-
" \"_name_or_path\": \"meta-llama/Llama-3.2-1B\",\n",
|
39 |
-
" \"architectures\": [\n",
|
40 |
-
" \"LlamaForCausalLM\"\n",
|
41 |
-
" ],\n",
|
42 |
-
" \"attention_bias\": false,\n",
|
43 |
-
" \"attention_dropout\": 0.0,\n",
|
44 |
-
" \"bos_token_id\": 128000,\n",
|
45 |
-
" \"eos_token_id\": 128001,\n",
|
46 |
-
" \"head_dim\": 64,\n",
|
47 |
-
" \"hidden_act\": \"silu\",\n",
|
48 |
-
" \"hidden_size\": 2048,\n",
|
49 |
-
" \"initializer_range\": 0.02,\n",
|
50 |
-
" \"intermediate_size\": 8192,\n",
|
51 |
-
" \"max_position_embeddings\": 131072,\n",
|
52 |
-
" \"mlp_bias\": false,\n",
|
53 |
-
" \"model_type\": \"llama\",\n",
|
54 |
-
" \"num_attention_heads\": 32,\n",
|
55 |
-
" \"num_hidden_layers\": 16,\n",
|
56 |
-
" \"num_key_value_heads\": 8,\n",
|
57 |
-
" \"pretraining_tp\": 1,\n",
|
58 |
-
" \"rms_norm_eps\": 1e-05,\n",
|
59 |
-
" \"rope_scaling\": {\n",
|
60 |
-
" \"factor\": 32.0,\n",
|
61 |
-
" \"high_freq_factor\": 4.0,\n",
|
62 |
-
" \"low_freq_factor\": 1.0,\n",
|
63 |
-
" \"original_max_position_embeddings\": 8192,\n",
|
64 |
-
" \"rope_type\": \"llama3\"\n",
|
65 |
-
" },\n",
|
66 |
-
" \"rope_theta\": 500000.0,\n",
|
67 |
-
" \"tie_word_embeddings\": true,\n",
|
68 |
-
" \"torch_dtype\": \"bfloat16\",\n",
|
69 |
-
" \"transformers_version\": \"4.47.0\",\n",
|
70 |
-
" \"use_cache\": true,\n",
|
71 |
-
" \"vocab_size\": 128256\n",
|
72 |
-
"}\n",
|
73 |
-
"\n"
|
74 |
-
]
|
75 |
-
},
|
76 |
-
{
|
77 |
-
"name": "stderr",
|
78 |
-
"output_type": "stream",
|
79 |
-
"text": [
|
80 |
-
"/tmp/ipykernel_3716586/1335258174.py:14: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
|
81 |
-
" model.classifier.load_state_dict(torch.load(classification_head_path))\n"
|
82 |
-
]
|
83 |
-
},
|
84 |
-
{
|
85 |
-
"data": {
|
86 |
-
"text/plain": [
|
87 |
-
"LlamaClassificationModel(\n",
|
88 |
-
" (base_model): LlamaModel(\n",
|
89 |
-
" (embed_tokens): Embedding(128256, 2048)\n",
|
90 |
-
" (layers): ModuleList(\n",
|
91 |
-
" (0-15): 16 x LlamaDecoderLayer(\n",
|
92 |
-
" (self_attn): LlamaSdpaAttention(\n",
|
93 |
-
" (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
94 |
-
" (k_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
|
95 |
-
" (v_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
|
96 |
-
" (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
97 |
-
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
98 |
-
" )\n",
|
99 |
-
" (mlp): LlamaMLP(\n",
|
100 |
-
" (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
|
101 |
-
" (up_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
|
102 |
-
" (down_proj): Linear(in_features=8192, out_features=2048, bias=False)\n",
|
103 |
-
" (act_fn): SiLU()\n",
|
104 |
-
" )\n",
|
105 |
-
" (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
|
106 |
-
" (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
|
107 |
-
" )\n",
|
108 |
-
" )\n",
|
109 |
-
" (norm): LlamaRMSNorm((2048,), eps=1e-05)\n",
|
110 |
-
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
111 |
-
" )\n",
|
112 |
-
" (classifier): Linear(in_features=2048, out_features=4, bias=True)\n",
|
113 |
-
")"
|
114 |
-
]
|
115 |
-
},
|
116 |
-
"execution_count": 3,
|
117 |
-
"metadata": {},
|
118 |
-
"output_type": "execute_result"
|
119 |
-
}
|
120 |
-
],
|
121 |
-
"source": [
|
122 |
-
"# Initialize the model\n",
|
123 |
-
"# model = DistilBertClassificationModel(repo_id)\n",
|
124 |
-
"model = LlamaClassificationModel()\n",
|
125 |
-
"\n",
|
126 |
-
"# Load the tokenizer\n",
|
127 |
-
"tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
|
128 |
-
"\n",
|
129 |
-
"classification_head_path = hf_hub_download(\n",
|
130 |
-
" repo_id=repo_id,\n",
|
131 |
-
" repo_type=\"model\",\n",
|
132 |
-
" filename=\"classification_head.pt\"\n",
|
133 |
-
")\n",
|
134 |
-
"\n",
|
135 |
-
"model.classifier.load_state_dict(torch.load(classification_head_path))\n",
|
136 |
-
"model.eval() # Set the model to evaluation mode"
|
137 |
-
]
|
138 |
-
},
|
139 |
-
{
|
140 |
-
"cell_type": "code",
|
141 |
-
"execution_count": 56,
|
142 |
-
"metadata": {},
|
143 |
-
"outputs": [
|
144 |
-
{
|
145 |
-
"name": "stdout",
|
146 |
-
"output_type": "stream",
|
147 |
-
"text": [
|
148 |
-
"tensor([[1, 0, 0, 0]], dtype=torch.int32)\n"
|
149 |
-
]
|
150 |
-
}
|
151 |
-
],
|
152 |
-
"source": [
|
153 |
-
"# text = \"What defects would occur with a beam size of 100 microns, a power of 500 W, a velocity of 100 mm/s and layer height of 10 microns and a hatch spacing of 10 microns for Ti-6Al-4V\"\n",
|
154 |
-
"# text = \"SS316L[SEP]500 W[SEP]10.0 mm/s[SEP]500.0 microns[SEP]500.0 microns[SEP]100.0 microns\"\n",
|
155 |
-
"text = \"SS316L[SEP]250.0 W[SEP]280.0 mm/s[SEP][SEP]950.0 microns[SEP]600.0 microns\"\n",
|
156 |
-
"\n",
|
157 |
-
"# Ensure the model is on the GPU\n",
|
158 |
-
"# device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
|
159 |
-
"device = \"cpu\"\n",
|
160 |
-
"model = model.to(device)\n",
|
161 |
-
"\n",
|
162 |
-
"# Tokenize input for the entire batch and move to GPU\n",
|
163 |
-
"inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, padding=\"max_length\", max_length=256)\n",
|
164 |
-
"inputs = {key: value.to(device) for key, value in inputs.items()}\n",
|
165 |
-
"\n",
|
166 |
-
"# Perform inference\n",
|
167 |
-
"outputs = model(**inputs)\n",
|
168 |
-
"\n",
|
169 |
-
"# Extract logits and apply sigmoid activation for multi-label classification\n",
|
170 |
-
"logits = outputs[\"logits\"]\n",
|
171 |
-
"probs = torch.sigmoid(logits)\n",
|
172 |
-
"\n",
|
173 |
-
"# Convert probabilities to one-hot encoded labels\n",
|
174 |
-
"preds = (probs > 0.5).int()\n",
|
175 |
-
"\n",
|
176 |
-
"# None, keyhole, lack of fusion, balling\n",
|
177 |
-
"print(preds)"
|
178 |
-
]
|
179 |
-
},
|
180 |
-
{
|
181 |
-
"cell_type": "code",
|
182 |
-
"execution_count": 1,
|
183 |
-
"metadata": {},
|
184 |
-
"outputs": [
|
185 |
-
{
|
186 |
-
"name": "stderr",
|
187 |
-
"output_type": "stream",
|
188 |
-
"text": [
|
189 |
-
"/mnt/am/GitHub/LLM-Enabled-Process-Map/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
190 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
191 |
-
]
|
192 |
-
}
|
193 |
-
],
|
194 |
-
"source": [
|
195 |
-
"import torch.nn as nn\n",
|
196 |
-
"from transformers import PreTrainedModel\n",
|
197 |
-
"\n",
|
198 |
-
"class PretrainedLlamaClassificationModel(PreTrainedModel):\n",
|
199 |
-
" def __init__(self, config):\n",
|
200 |
-
" super().__init__(config)\n",
|
201 |
-
" self.base_model = AutoModel.from_pretrained(config.model_path, config=config)\n",
|
202 |
-
" self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
|
203 |
-
" self.config = config\n",
|
204 |
-
"\n",
|
205 |
-
" def forward(self, input_ids, attention_mask, labels=None):\n",
|
206 |
-
" outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)\n",
|
207 |
-
" summed_representation = outputs.last_hidden_state.sum(dim=1)\n",
|
208 |
-
" logits = self.classifier(summed_representation)\n",
|
209 |
-
" loss = None\n",
|
210 |
-
" if labels is not None:\n",
|
211 |
-
" loss_fn = nn.BCEWithLogitsLoss()\n",
|
212 |
-
" loss = loss_fn(logits, labels.float())\n",
|
213 |
-
" return {\"loss\": loss, \"logits\": logits}\n"
|
214 |
-
]
|
215 |
-
},
|
216 |
-
{
|
217 |
-
"cell_type": "code",
|
218 |
-
"execution_count": 1,
|
219 |
-
"metadata": {},
|
220 |
-
"outputs": [
|
221 |
-
{
|
222 |
-
"name": "stderr",
|
223 |
-
"output_type": "stream",
|
224 |
-
"text": [
|
225 |
-
"/mnt/am/GitHub/LLM-Enabled-Process-Map/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
226 |
-
" from .autonotebook import tqdm as notebook_tqdm\n",
|
227 |
-
"Some weights of LlamaModel were not initialized from the model checkpoint at ppak10/defect-classification-llama-baseline-25-epochs and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up_proj.weight', 'layers.10.post_attention_layernorm.weight', 'layers.10.self_attn.k_proj.weight', 'layers.10.self_attn.o_proj.weight', 'layers.10.self_attn.q_proj.weight', 'layers.10.self_attn.v_proj.weight', 'layers.11.input_layernorm.weight', 'layers.11.mlp.down_proj.weight', 'layers.11.mlp.gate_proj.weight', 'layers.11.mlp.up_proj.weight', 'layers.11.post_attention_layernorm.weight', 'layers.11.self_attn.k_proj.weight', 'layers.11.self_attn.o_proj.weight', 'layers.11.self_attn.q_proj.weight', 'layers.11.self_attn.v_proj.weight', 'layers.12.input_layernorm.weight', 'layers.12.mlp.down_proj.weight', 'layers.12.mlp.gate_proj.weight', 'layers.12.mlp.up_proj.weight', 'layers.12.post_attention_layernorm.weight', 'layers.12.self_attn.k_proj.weight', 'layers.12.self_attn.o_proj.weight', 'layers.12.self_attn.q_proj.weight', 'layers.12.self_attn.v_proj.weight', 'layers.13.input_layernorm.weight', 'layers.13.mlp.down_proj.weight', 'layers.13.mlp.gate_proj.weight', 'layers.13.mlp.up_proj.weight', 'layers.13.post_attention_layernorm.weight', 'layers.13.self_attn.k_proj.weight', 'layers.13.self_attn.o_proj.weight', 'layers.13.self_attn.q_proj.weight', 'layers.13.self_attn.v_proj.weight', 'layers.14.input_layernorm.weight', 'layers.14.mlp.down_proj.weight', 'layers.14.mlp.gate_proj.weight', 'layers.14.mlp.up_proj.weight', 'layers.14.post_attention_layernorm.weight', 'layers.14.self_attn.k_proj.weight', 'layers.14.self_attn.o_proj.weight', 'layers.14.self_attn.q_proj.weight', 'layers.14.self_attn.v_proj.weight', 'layers.15.input_layernorm.weight', 'layers.15.mlp.down_proj.weight', 'layers.15.mlp.gate_proj.weight', 'layers.15.mlp.up_proj.weight', 'layers.15.post_attention_layernorm.weight', 'layers.15.self_attn.k_proj.weight', 'layers.15.self_attn.o_proj.weight', 'layers.15.self_attn.q_proj.weight', 'layers.15.self_attn.v_proj.weight', 'layers.16.input_layernorm.weight', 'layers.16.mlp.down_proj.weight', 'layers.16.mlp.gate_proj.weight', 'layers.16.mlp.up_proj.weight', 'layers.16.post_attention_layernorm.weight', 'layers.16.self_attn.k_proj.weight', 'layers.16.self_attn.o_proj.weight', 'layers.16.self_attn.q_proj.weight', 'layers.16.self_attn.v_proj.weight', 'layers.17.input_layernorm.weight', 'layers.17.mlp.down_proj.weight', 'layers.17.mlp.gate_proj.weight', 'layers.17.mlp.up_proj.weight', 'layers.17.post_attention_layernorm.weight', 'layers.17.self_attn.k_proj.weight', 'layers.17.self_attn.o_proj.weight', 'layers.17.self_attn.q_proj.weight', 'layers.17.self_attn.v_proj.weight', 'layers.18.input_layernorm.weight', 'layers.18.mlp.down_proj.weight', 'layers.18.mlp.gate_proj.weight', 'layers.18.mlp.up_proj.weight', 'layers.18.post_attention_layernorm.weight', 'layers.18.self_attn.k_proj.weight', 'layers.18.self_attn.o_proj.weight', 'layers.18.self_attn.q_proj.weight', 'layers.18.self_attn.v_proj.weight', 'layers.19.input_layernorm.weight', 'layers.19.mlp.down_proj.weight', 'layers.19.mlp.gate_proj.weight', 'layers.19.mlp.up_proj.weight', 'layers.19.post_attention_layernorm.weight', 'layers.19.self_attn.k_proj.weight', 'layers.19.self_attn.o_proj.weight', 'layers.19.self_attn.q_proj.weight', 'layers.19.self_attn.v_proj.weight', 'layers.2.input_layernorm.weight', 'layers.2.mlp.down_proj.weight', 'layers.2.mlp.gate_proj.weight', 'layers.2.mlp.up_proj.weight', 'layers.2.post_attention_layernorm.weight', 'layers.2.self_attn.k_proj.weight', 'layers.2.self_attn.o_proj.weight', 'layers.2.self_attn.q_proj.weight', 'layers.2.self_attn.v_proj.weight', 'layers.20.input_layernorm.weight', 'layers.20.mlp.down_proj.weight', 'layers.20.mlp.gate_proj.weight', 'layers.20.mlp.up_proj.weight', 'layers.20.post_attention_layernorm.weight', 'layers.20.self_attn.k_proj.weight', 'layers.20.self_attn.o_proj.weight', 'layers.20.self_attn.q_proj.weight', 'layers.20.self_attn.v_proj.weight', 'layers.21.input_layernorm.weight', 'layers.21.mlp.down_proj.weight', 'layers.21.mlp.gate_proj.weight', 'layers.21.mlp.up_proj.weight', 'layers.21.post_attention_layernorm.weight', 'layers.21.self_attn.k_proj.weight', 'layers.21.self_attn.o_proj.weight', 'layers.21.self_attn.q_proj.weight', 'layers.21.self_attn.v_proj.weight', 'layers.22.input_layernorm.weight', 'layers.22.mlp.down_proj.weight', 'layers.22.mlp.gate_proj.weight', 'layers.22.mlp.up_proj.weight', 'layers.22.post_attention_layernorm.weight', 'layers.22.self_attn.k_proj.weight', 'layers.22.self_attn.o_proj.weight', 'layers.22.self_attn.q_proj.weight', 'layers.22.self_attn.v_proj.weight', 'layers.23.input_layernorm.weight', 'layers.23.mlp.down_proj.weight', 'layers.23.mlp.gate_proj.weight', 'layers.23.mlp.up_proj.weight', 'layers.23.post_attention_layernorm.weight', 'layers.23.self_attn.k_proj.weight', 'layers.23.self_attn.o_proj.weight', 'layers.23.self_attn.q_proj.weight', 'layers.23.self_attn.v_proj.weight', 'layers.24.input_layernorm.weight', 'layers.24.mlp.down_proj.weight', 'layers.24.mlp.gate_proj.weight', 'layers.24.mlp.up_proj.weight', 'layers.24.post_attention_layernorm.weight', 'layers.24.self_attn.k_proj.weight', 'layers.24.self_attn.o_proj.weight', 'layers.24.self_attn.q_proj.weight', 'layers.24.self_attn.v_proj.weight', 'layers.25.input_layernorm.weight', 'layers.25.mlp.down_proj.weight', 'layers.25.mlp.gate_proj.weight', 'layers.25.mlp.up_proj.weight', 'layers.25.post_attention_layernorm.weight', 'layers.25.self_attn.k_proj.weight', 'layers.25.self_attn.o_proj.weight', 'layers.25.self_attn.q_proj.weight', 'layers.25.self_attn.v_proj.weight', 'layers.26.input_layernorm.weight', 'layers.26.mlp.down_proj.weight', 'layers.26.mlp.gate_proj.weight', 'layers.26.mlp.up_proj.weight', 'layers.26.post_attention_layernorm.weight', 'layers.26.self_attn.k_proj.weight', 'layers.26.self_attn.o_proj.weight', 'layers.26.self_attn.q_proj.weight', 'layers.26.self_attn.v_proj.weight', 'layers.27.input_layernorm.weight', 'layers.27.mlp.down_proj.weight', 'layers.27.mlp.gate_proj.weight', 'layers.27.mlp.up_proj.weight', 'layers.27.post_attention_layernorm.weight', 'layers.27.self_attn.k_proj.weight', 'layers.27.self_attn.o_proj.weight', 'layers.27.self_attn.q_proj.weight', 'layers.27.self_attn.v_proj.weight', 'layers.28.input_layernorm.weight', 'layers.28.mlp.down_proj.weight', 'layers.28.mlp.gate_proj.weight', 'layers.28.mlp.up_proj.weight', 'layers.28.post_attention_layernorm.weight', 'layers.28.self_attn.k_proj.weight', 'layers.28.self_attn.o_proj.weight', 'layers.28.self_attn.q_proj.weight', 'layers.28.self_attn.v_proj.weight', 'layers.29.input_layernorm.weight', 'layers.29.mlp.down_proj.weight', 'layers.29.mlp.gate_proj.weight', 'layers.29.mlp.up_proj.weight', 'layers.29.post_attention_layernorm.weight', 'layers.29.self_attn.k_proj.weight', 'layers.29.self_attn.o_proj.weight', 'layers.29.self_attn.q_proj.weight', 'layers.29.self_attn.v_proj.weight', 'layers.3.input_layernorm.weight', 'layers.3.mlp.down_proj.weight', 'layers.3.mlp.gate_proj.weight', 'layers.3.mlp.up_proj.weight', 'layers.3.post_attention_layernorm.weight', 'layers.3.self_attn.k_proj.weight', 'layers.3.self_attn.o_proj.weight', 'layers.3.self_attn.q_proj.weight', 'layers.3.self_attn.v_proj.weight', 'layers.30.input_layernorm.weight', 'layers.30.mlp.down_proj.weight', 'layers.30.mlp.gate_proj.weight', 'layers.30.mlp.up_proj.weight', 'layers.30.post_attention_layernorm.weight', 'layers.30.self_attn.k_proj.weight', 'layers.30.self_attn.o_proj.weight', 'layers.30.self_attn.q_proj.weight', 'layers.30.self_attn.v_proj.weight', 'layers.31.input_layernorm.weight', 'layers.31.mlp.down_proj.weight', 'layers.31.mlp.gate_proj.weight', 'layers.31.mlp.up_proj.weight', 'layers.31.post_attention_layernorm.weight', 'layers.31.self_attn.k_proj.weight', 'layers.31.self_attn.o_proj.weight', 'layers.31.self_attn.q_proj.weight', 'layers.31.self_attn.v_proj.weight', 'layers.4.input_layernorm.weight', 'layers.4.mlp.down_proj.weight', 'layers.4.mlp.gate_proj.weight', 'layers.4.mlp.up_proj.weight', 'layers.4.post_attention_layernorm.weight', 'layers.4.self_attn.k_proj.weight', 'layers.4.self_attn.o_proj.weight', 'layers.4.self_attn.q_proj.weight', 'layers.4.self_attn.v_proj.weight', 'layers.5.input_layernorm.weight', 'layers.5.mlp.down_proj.weight', 'layers.5.mlp.gate_proj.weight', 'layers.5.mlp.up_proj.weight', 'layers.5.post_attention_layernorm.weight', 'layers.5.self_attn.k_proj.weight', 'layers.5.self_attn.o_proj.weight', 'layers.5.self_attn.q_proj.weight', 'layers.5.self_attn.v_proj.weight', 'layers.6.input_layernorm.weight', 'layers.6.mlp.down_proj.weight', 'layers.6.mlp.gate_proj.weight', 'layers.6.mlp.up_proj.weight', 'layers.6.post_attention_layernorm.weight', 'layers.6.self_attn.k_proj.weight', 'layers.6.self_attn.o_proj.weight', 'layers.6.self_attn.q_proj.weight', 'layers.6.self_attn.v_proj.weight', 'layers.7.input_layernorm.weight', 'layers.7.mlp.down_proj.weight', 'layers.7.mlp.gate_proj.weight', 'layers.7.mlp.up_proj.weight', 'layers.7.post_attention_layernorm.weight', 'layers.7.self_attn.k_proj.weight', 'layers.7.self_attn.o_proj.weight', 'layers.7.self_attn.q_proj.weight', 'layers.7.self_attn.v_proj.weight', 'layers.8.input_layernorm.weight', 'layers.8.mlp.down_proj.weight', 'layers.8.mlp.gate_proj.weight', 'layers.8.mlp.up_proj.weight', 'layers.8.post_attention_layernorm.weight', 'layers.8.self_attn.k_proj.weight', 'layers.8.self_attn.o_proj.weight', 'layers.8.self_attn.q_proj.weight', 'layers.8.self_attn.v_proj.weight', 'layers.9.input_layernorm.weight', 'layers.9.mlp.down_proj.weight', 'layers.9.mlp.gate_proj.weight', 'layers.9.mlp.up_proj.weight', 'layers.9.post_attention_layernorm.weight', 'layers.9.self_attn.k_proj.weight', 'layers.9.self_attn.o_proj.weight', 'layers.9.self_attn.q_proj.weight', 'layers.9.self_attn.v_proj.weight', 'norm.weight']\n",
|
228 |
-
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
229 |
-
]
|
230 |
-
}
|
231 |
-
],
|
232 |
-
"source": [
|
233 |
-
"from transformers import AutoModel, pipeline\n",
|
234 |
-
"\n",
|
235 |
-
"repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\"\n",
|
236 |
-
"model = AutoModel.from_pretrained(repo_id)\n",
|
237 |
-
"# tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
|
238 |
-
"\n",
|
239 |
-
"# classification_pipeline = pipeline(\"text-classification\", model=model, tokenizer=tokenizer)\n",
|
240 |
-
"# result = classification_pipeline(\"Test input text\")\n",
|
241 |
-
"# print(result)\n"
|
242 |
-
]
|
243 |
-
},
|
244 |
-
{
|
245 |
-
"cell_type": "code",
|
246 |
-
"execution_count": 2,
|
247 |
-
"metadata": {},
|
248 |
-
"outputs": [
|
249 |
-
{
|
250 |
-
"name": "stdout",
|
251 |
-
"output_type": "stream",
|
252 |
-
"text": [
|
253 |
-
"LlamaModel(\n",
|
254 |
-
" (embed_tokens): Embedding(32000, 2048)\n",
|
255 |
-
" (layers): ModuleList(\n",
|
256 |
-
" (0-31): 32 x LlamaDecoderLayer(\n",
|
257 |
-
" (self_attn): LlamaSdpaAttention(\n",
|
258 |
-
" (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
259 |
-
" (k_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
260 |
-
" (v_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
261 |
-
" (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
|
262 |
-
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
263 |
-
" )\n",
|
264 |
-
" (mlp): LlamaMLP(\n",
|
265 |
-
" (gate_proj): Linear(in_features=2048, out_features=11008, bias=False)\n",
|
266 |
-
" (up_proj): Linear(in_features=2048, out_features=11008, bias=False)\n",
|
267 |
-
" (down_proj): Linear(in_features=11008, out_features=2048, bias=False)\n",
|
268 |
-
" (act_fn): SiLU()\n",
|
269 |
-
" )\n",
|
270 |
-
" (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)\n",
|
271 |
-
" (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)\n",
|
272 |
-
" )\n",
|
273 |
-
" )\n",
|
274 |
-
" (norm): LlamaRMSNorm((2048,), eps=1e-06)\n",
|
275 |
-
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
276 |
-
")\n"
|
277 |
-
]
|
278 |
-
}
|
279 |
-
],
|
280 |
-
"source": [
|
281 |
-
"print(model)"
|
282 |
-
]
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"cell_type": "code",
|
286 |
-
"execution_count": null,
|
287 |
-
"metadata": {},
|
288 |
-
"outputs": [],
|
289 |
-
"source": []
|
290 |
-
}
|
291 |
-
],
|
292 |
-
"metadata": {
|
293 |
-
"kernelspec": {
|
294 |
-
"display_name": "venv",
|
295 |
-
"language": "python",
|
296 |
-
"name": "python3"
|
297 |
-
},
|
298 |
-
"language_info": {
|
299 |
-
"codemirror_mode": {
|
300 |
-
"name": "ipython",
|
301 |
-
"version": 3
|
302 |
-
},
|
303 |
-
"file_extension": ".py",
|
304 |
-
"mimetype": "text/x-python",
|
305 |
-
"name": "python",
|
306 |
-
"nbconvert_exporter": "python",
|
307 |
-
"pygments_lexer": "ipython3",
|
308 |
-
"version": "3.10.12"
|
309 |
-
}
|
310 |
-
},
|
311 |
-
"nbformat": 4,
|
312 |
-
"nbformat_minor": 2
|
313 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1 |
jupyterlab==4.2.5
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
jupyterlab==4.2.5
|
2 |
+
ipywidgets
|
3 |
+
torch
|
4 |
+
huggingface_hub
|
5 |
+
transformers
|
6 |
+
sentencepiece
|
7 |
+
safetensors
|
8 |
+
datasets
|
9 |
+
numpy
|
setup.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name="llm_enabled_process_map",
|
5 |
+
packages=find_packages()
|
6 |
+
)
|
start_server.sh
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
#!/bin/bash
|
2 |
JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
|
3 |
|
4 |
-
NOTEBOOK_DIR="
|
5 |
|
6 |
jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
7 |
|
|
|
1 |
#!/bin/bash
|
2 |
JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
|
3 |
|
4 |
+
NOTEBOOK_DIR="model"
|
5 |
|
6 |
jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
7 |
|