ppak10 commited on
Commit
1b74e0a
Β·
1 Parent(s): 0059492

Adds notebook and setup for testing models.

Browse files
Files changed (42) hide show
  1. Dockerfile +2 -0
  2. model/__init__.py +0 -0
  3. model/distilbert.py +58 -0
  4. model/llama.py +37 -0
  5. {notebooks β†’ model/notebooks}/distilbert_baseline_05_epochs.ipynb +0 -0
  6. {notebooks β†’ model/notebooks}/distilbert_baseline_10_epochs.ipynb +0 -0
  7. {notebooks β†’ model/notebooks}/distilbert_baseline_15_epochs.ipynb +0 -0
  8. {notebooks β†’ model/notebooks}/distilbert_baseline_20_epochs.ipynb +0 -0
  9. {notebooks β†’ model/notebooks}/distilbert_baseline_20_epochs_prompt_input.ipynb +0 -0
  10. {notebooks β†’ model/notebooks}/distilbert_baseline_25_epochs.ipynb +0 -0
  11. {notebooks β†’ model/notebooks}/distilbert_prompt_02_epochs.ipynb +0 -0
  12. {notebooks β†’ model/notebooks}/distilbert_prompt_05_epochs.ipynb +0 -0
  13. model/notebooks/inference.ipynb +152 -0
  14. {notebooks β†’ model/notebooks}/llama_baseline_05_epochs.ipynb +0 -0
  15. {notebooks β†’ model/notebooks}/llama_baseline_10_epochs.ipynb +0 -0
  16. {notebooks β†’ model/notebooks}/llama_baseline_15_epochs.ipynb +0 -0
  17. {notebooks β†’ model/notebooks}/llama_baseline_20_epochs.ipynb +0 -0
  18. {notebooks β†’ model/notebooks}/llama_baseline_20_epochs_prompt_input.ipynb +0 -0
  19. {notebooks β†’ model/notebooks}/llama_baseline_25_epochs.ipynb +0 -0
  20. {notebooks β†’ model/notebooks}/llama_prompt_0.5_epochs.ipynb +0 -0
  21. {notebooks β†’ model/notebooks}/scibert_baseline_05_epochs.ipynb +0 -0
  22. {notebooks β†’ model/notebooks}/scibert_baseline_10_epochs.ipynb +0 -0
  23. {notebooks β†’ model/notebooks}/scibert_baseline_15_epochs.ipynb +0 -0
  24. {notebooks β†’ model/notebooks}/scibert_baseline_20_epochs.ipynb +0 -0
  25. {notebooks β†’ model/notebooks}/scibert_baseline_20_epochs_prompt_input.ipynb +0 -0
  26. {notebooks β†’ model/notebooks}/scibert_baseline_25_epochs.ipynb +0 -0
  27. {notebooks β†’ model/notebooks}/scibert_prompt_02_epochs.ipynb +0 -0
  28. {notebooks β†’ model/notebooks}/scibert_prompt_05_epochs.ipynb +0 -0
  29. {notebooks β†’ model/notebooks}/t5_baseline_05_epochs.ipynb +0 -0
  30. {notebooks β†’ model/notebooks}/t5_baseline_10_epochs.ipynb +0 -0
  31. {notebooks β†’ model/notebooks}/t5_baseline_15_epochs.ipynb +0 -0
  32. {notebooks β†’ model/notebooks}/t5_baseline_20_epochs.ipynb +0 -0
  33. {notebooks β†’ model/notebooks}/t5_baseline_20_epochs_prompt_input.ipynb +0 -0
  34. {notebooks β†’ model/notebooks}/t5_baseline_25_epochs.ipynb +0 -0
  35. {notebooks β†’ model/notebooks}/t5_prompt_02_epochs.ipynb +0 -0
  36. {notebooks β†’ model/notebooks}/t5_prompt_05_epochs.ipynb +0 -0
  37. model/scibert.py +67 -0
  38. model/t5.py +54 -0
  39. notebooks/inference.ipynb +0 -313
  40. requirements.txt +8 -2
  41. setup.py +6 -0
  42. start_server.sh +1 -1
Dockerfile CHANGED
@@ -90,6 +90,8 @@ RUN --mount=target=requirements.txt,source=requirements.txt \
90
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
91
  COPY --chown=user . $HOME/app
92
 
 
 
93
  RUN chmod +x start_server.sh
94
 
95
  COPY --chown=user login.html /home/user/miniconda/lib/python3.9/site-packages/jupyter_server/templates/login.html
 
90
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
91
  COPY --chown=user . $HOME/app
92
 
93
+ RUN pip install -e .
94
+
95
  RUN chmod +x start_server.sh
96
 
97
  COPY --chown=user login.html /home/user/miniconda/lib/python3.9/site-packages/jupyter_server/templates/login.html
model/__init__.py ADDED
File without changes
model/distilbert.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel
2
+ from huggingface_hub import hf_hub_download
3
+ from safetensors.torch import load_file
4
+ import torch.nn as nn
5
+ import torch
6
+
7
+ # Number of labels (update if different)
8
+ NUM_LABELS = 4
9
+
10
+ # Model with frozen DistilBERT weights
11
+ class DistilBertClassificationModel(nn.Module):
12
+ def __init__(
13
+ self,
14
+ model_path="distilbert/distilbert-base-uncased",
15
+ freeze_weights=True,
16
+ ):
17
+ super(DistilBertClassificationModel, self).__init__()
18
+ if model_path == "distilbert/distilbert-base-uncased":
19
+ self.base_model = AutoModel.from_pretrained(model_path)
20
+ else:
21
+ pytorch_model_path = hf_hub_download(
22
+ repo_id=model_path,
23
+ repo_type="model",
24
+ filename="model.safetensors"
25
+ )
26
+ state_dict = load_file(pytorch_model_path)
27
+ filtered_state_dict = {
28
+ k.replace("base_model.", ""): v
29
+ for k, v in state_dict.items()
30
+ if not k.startswith("classifier.")
31
+ }
32
+
33
+ self.base_model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", state_dict=filtered_state_dict)
34
+
35
+ # For push to hub.
36
+ self.config = self.base_model.config
37
+
38
+ # Freeze the base model's weights
39
+ if freeze_weights:
40
+ for param in self.base_model.parameters():
41
+ param.requires_grad = False
42
+
43
+ # Add a classification head
44
+ self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
45
+
46
+ def forward(self, input_ids, attention_mask, labels=None):
47
+ with torch.no_grad(): # No gradients for the base model
48
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
49
+
50
+ # Sum hidden states over the sequence dimension
51
+ summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over sequence length
52
+
53
+ logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
54
+ loss = None
55
+ if labels is not None:
56
+ loss_fn = nn.BCEWithLogitsLoss()
57
+ loss = loss_fn(logits, labels.float())
58
+ return {"loss": loss, "logits": logits}
model/llama.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+
4
+ from transformers import AutoModel
5
+
6
+ NUM_LABELS = 4
7
+
8
+ # Model with frozen LLaMA weights
9
+ class LlamaClassificationModel(nn.Module):
10
+ def __init__(self, model_path = "meta-llama/Llama-3.2-1B", freeze_weights = True):
11
+ super(LlamaClassificationModel, self).__init__()
12
+ self.base_model = AutoModel.from_pretrained(model_path)
13
+
14
+ # For push to hub.
15
+ self.config = self.base_model.config
16
+
17
+ # Freeze the base model's weights
18
+ if freeze_weights:
19
+ for param in self.base_model.parameters():
20
+ param.requires_grad = False
21
+
22
+ # Add a classification head
23
+ self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
24
+
25
+ def forward(self, input_ids, attention_mask, labels=None):
26
+ with torch.no_grad(): # No gradients for the base model
27
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
28
+
29
+ # Sum hidden states over the sequence dimension
30
+ summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over sequence length
31
+
32
+ logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
33
+ loss = None
34
+ if labels is not None:
35
+ loss_fn = nn.BCEWithLogitsLoss()
36
+ loss = loss_fn(logits, labels.float())
37
+ return {"loss": loss, "logits": logits}
{notebooks β†’ model/notebooks}/distilbert_baseline_05_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_baseline_10_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_baseline_15_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_baseline_20_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_baseline_20_epochs_prompt_input.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_baseline_25_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_prompt_02_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/distilbert_prompt_05_epochs.ipynb RENAMED
File without changes
model/notebooks/inference.ipynb ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Inference"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": null,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import torch\n",
17
+ "\n",
18
+ "from huggingface_hub import hf_hub_download\n",
19
+ "from transformers import AutoTokenizer\n",
20
+ "\n",
21
+ "from model.distilbert import DistilBertClassificationModel\n",
22
+ "from model.scibert import SciBertClassificationModel\n",
23
+ "from model.llama import LlamaClassificationModel\n",
24
+ "from model.t5 import T5ClassificationModel"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "markdown",
29
+ "metadata": {},
30
+ "source": [
31
+ "## Model Selection\n",
32
+ "Uncomment desired `repo_id` and corresponding `model` and input type."
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 2,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "# Baseline\n",
42
+ "repo_id = \"ppak10/defect-classification-distilbert-baseline-25-epochs\"\n",
43
+ "# repo_id = \"ppak10/defect-classification-scibert-baseline-25-epochs\"\n",
44
+ "# repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\"\n",
45
+ "# repo_id = \"ppak10/defect-classification-t5-baseline-25-epochs\"\n",
46
+ "\n",
47
+ "# Prompt \n",
48
+ "# repo_id = \"ppak10/defect-classification-distilbert-prompt-02-epochs\"\n",
49
+ "# repo_id = \"ppak10/defect-classification-scibert-prompt-02-epochs\"\n",
50
+ "# repo_id = \"ppak10/defect-classification-llama-prompt-02-epochs\"\n",
51
+ "# repo_id = \"ppak10/defect-classification-t5-prompt-02-epochs\"\n",
52
+ "\n",
53
+ "# Initialize the model\n",
54
+ "model = DistilBertClassificationModel(repo_id)\n",
55
+ "# model = SciBertClassificationModel(repo_id)\n",
56
+ "# model = LlamaClassificationModel()\n",
57
+ "# model = T5ClassificationModel(repo_id)"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "# Load the tokenizer\n",
67
+ "tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
68
+ "\n",
69
+ "# Loads classification head weights\n",
70
+ "classification_head_path = hf_hub_download(\n",
71
+ " repo_id=repo_id,\n",
72
+ " repo_type=\"model\",\n",
73
+ " filename=\"classification_head.pt\"\n",
74
+ ")\n",
75
+ "\n",
76
+ "model.classifier.load_state_dict(torch.load(classification_head_path, map_location=torch.device(\"cpu\")))\n",
77
+ "model.eval()"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "# Baseline\n",
87
+ "# text = \"Ti-6Al-4V[SEP]280.0 W[SEP]400.0 mm/s[SEP]100.0 microns[SEP]50.0 microns[SEP]100.0 microns\"\n",
88
+ "\n",
89
+ "# Prompt\n",
90
+ "text = \"What are the likely imperfections that occur in Ti-6Al-4V L-PBF builds at 280.0 W, given a 100.0 microns beam diameter, a 400.0 mm/s scan speed, a 100.0 microns hatch spacing, and a 50.0 microns layer height?\""
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "# Tokenize inputs \n",
100
+ "inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, padding=\"max_length\", max_length=256)\n",
101
+ "\n",
102
+ "# For scibert\n",
103
+ "inputs_kwargs = {}\n",
104
+ "for key, value in inputs.items():\n",
105
+ " if key not in [\"token_type_ids\"]:\n",
106
+ " inputs_kwargs[key] = value\n",
107
+ "\n",
108
+ "# Perform inference\n",
109
+ "outputs = model(**inputs_kwargs)\n",
110
+ "\n",
111
+ "# Extract logits and apply sigmoid activation for multi-label classification\n",
112
+ "probs = torch.sigmoid(outputs[\"logits\"])\n",
113
+ "\n",
114
+ "# Convert probabilities to one-hot encoded labels\n",
115
+ "preds = (probs > 0.5).int().squeeze()\n",
116
+ "\n",
117
+ "# One hot encoded classifications\n",
118
+ "classifications = [\"None\", \"Keyhole\", \"Lack of Fusion\", \"Balling\"]\n",
119
+ " \n",
120
+ "print([classifications[index] for index, encoding in enumerate(preds) if encoding == 1])"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": []
129
+ }
130
+ ],
131
+ "metadata": {
132
+ "kernelspec": {
133
+ "display_name": "venv",
134
+ "language": "python",
135
+ "name": "python3"
136
+ },
137
+ "language_info": {
138
+ "codemirror_mode": {
139
+ "name": "ipython",
140
+ "version": 3
141
+ },
142
+ "file_extension": ".py",
143
+ "mimetype": "text/x-python",
144
+ "name": "python",
145
+ "nbconvert_exporter": "python",
146
+ "pygments_lexer": "ipython3",
147
+ "version": "3.12.3"
148
+ }
149
+ },
150
+ "nbformat": 4,
151
+ "nbformat_minor": 2
152
+ }
{notebooks β†’ model/notebooks}/llama_baseline_05_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_baseline_10_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_baseline_15_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_baseline_20_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_baseline_20_epochs_prompt_input.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_baseline_25_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/llama_prompt_0.5_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_05_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_10_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_15_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_20_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_20_epochs_prompt_input.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_baseline_25_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_prompt_02_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/scibert_prompt_05_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_05_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_10_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_15_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_20_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_20_epochs_prompt_input.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_baseline_25_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_prompt_02_epochs.ipynb RENAMED
File without changes
{notebooks β†’ model/notebooks}/t5_prompt_05_epochs.ipynb RENAMED
File without changes
model/scibert.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel
2
+ from huggingface_hub import hf_hub_download
3
+ from safetensors.torch import load_file
4
+ import torch.nn as nn
5
+ import torch
6
+
7
+ # Number of labels (update if different)
8
+ NUM_LABELS = 4
9
+
10
+ class SciBertClassificationModel(nn.Module):
11
+ def __init__(self, model_path="allenai/scibert_scivocab_uncased", freeze_weights=True):
12
+ super(SciBertClassificationModel, self).__init__()
13
+ if model_path == "allenai/scibert_scivocab_uncased":
14
+ self.base_model = AutoModel.from_pretrained(model_path)
15
+ else:
16
+ pytorch_model_path = hf_hub_download(
17
+ repo_id=model_path,
18
+ repo_type="model",
19
+ filename="model.safetensors"
20
+ )
21
+ state_dict = load_file(pytorch_model_path)
22
+ filtered_state_dict = {
23
+ k.replace("base_model.", ""): v
24
+ for k, v in state_dict.items()
25
+ if not k.startswith("classifier.")
26
+ }
27
+
28
+ self.base_model = AutoModel.from_pretrained("allenai/scibert_scivocab_uncased", state_dict=filtered_state_dict)
29
+
30
+ # For push to hub.
31
+ self.config = self.base_model.config
32
+
33
+ # Freeze the base model's weights
34
+ if freeze_weights:
35
+ for param in self.base_model.parameters():
36
+ param.requires_grad = False
37
+
38
+ # Add a classification head
39
+ self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
40
+
41
+ def forward(self, input_ids, attention_mask, labels=None):
42
+ with torch.no_grad(): # No gradients for the base model
43
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
44
+
45
+ # Ensure the tensor is contiguous before passing to the classifier
46
+ # cls_token_representation = outputs.last_hidden_state[:, 0, :].contiguous()
47
+ # logits = self.classifier(cls_token_representation)
48
+
49
+ # Sum token representations
50
+ summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over the sequence length (dim=1)
51
+
52
+ logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
53
+
54
+ loss = None
55
+ if labels is not None:
56
+ loss_fn = nn.BCEWithLogitsLoss()
57
+ loss = loss_fn(logits, labels.float())
58
+ return {"loss": loss, "logits": logits}
59
+
60
+ def state_dict(self, *args, **kwargs):
61
+ # Get the state dictionary
62
+ state_dict = super().state_dict(*args, **kwargs)
63
+ # Ensure all tensors are contiguous
64
+ for key, tensor in state_dict.items():
65
+ if isinstance(tensor, torch.Tensor) and not tensor.is_contiguous():
66
+ state_dict[key] = tensor.contiguous()
67
+ return state_dict
model/t5.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import T5EncoderModel, T5Config
2
+ from huggingface_hub import hf_hub_download
3
+ import torch.nn as nn
4
+ import torch
5
+
6
+ NUM_LABELS = 4
7
+
8
+ class T5ClassificationModel(nn.Module):
9
+ def __init__(self, model_path="t5-small", freeze_weights=True):
10
+ super(T5ClassificationModel, self).__init__()
11
+ if model_path == "t5-small":
12
+ self.base_model = T5EncoderModel.from_pretrained(model_path)
13
+ else:
14
+ pytorch_model_path = hf_hub_download(
15
+ repo_id=model_path,
16
+ repo_type="model",
17
+ filename="pytorch_model.bin"
18
+ )
19
+ config = T5Config.from_pretrained(model_path)
20
+ self.base_model = T5EncoderModel(config)
21
+
22
+ # Load the state_dict and remove unwanted keys
23
+ state_dict = torch.load(pytorch_model_path)
24
+ filtered_state_dict = {
25
+ k.replace("base_model.", ""): v
26
+ for k, v in state_dict.items()
27
+ if not k.startswith("classifier.")
28
+ }
29
+ self.base_model.load_state_dict(filtered_state_dict)
30
+
31
+ # For push to hub.
32
+ self.config = self.base_model.config
33
+
34
+ # Freeze the base model's weights
35
+ if freeze_weights:
36
+ for param in self.base_model.parameters():
37
+ param.requires_grad = False
38
+
39
+ # Add a classification head
40
+ self.classifier = nn.Linear(self.base_model.config.hidden_size, NUM_LABELS)
41
+
42
+ def forward(self, input_ids, attention_mask, labels=None):
43
+ with torch.no_grad(): # No gradients for the base model
44
+ outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
45
+
46
+ # Sum token representations
47
+ summed_representation = outputs.last_hidden_state.sum(dim=1) # Summing over the sequence length (dim=1)
48
+
49
+ logits = self.classifier(summed_representation) # Pass the summed representation to the classifier
50
+ loss = None
51
+ if labels is not None:
52
+ loss_fn = nn.BCEWithLogitsLoss()
53
+ loss = loss_fn(logits, labels.float())
54
+ return {"loss": loss, "logits": logits}
notebooks/inference.ipynb DELETED
@@ -1,313 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 3,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import torch\n",
10
- "\n",
11
- "from huggingface_hub import hf_hub_download\n",
12
- "from transformers import AutoTokenizer\n",
13
- "\n",
14
- "from model.distilbert import DistilBertClassificationModel\n",
15
- "from model.llama import LlamaClassificationModel"
16
- ]
17
- },
18
- {
19
- "cell_type": "code",
20
- "execution_count": 2,
21
- "metadata": {},
22
- "outputs": [],
23
- "source": [
24
- "repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\""
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": 3,
30
- "metadata": {},
31
- "outputs": [
32
- {
33
- "name": "stdout",
34
- "output_type": "stream",
35
- "text": [
36
- "LlamaConfig {\n",
37
- " \"_attn_implementation_autoset\": true,\n",
38
- " \"_name_or_path\": \"meta-llama/Llama-3.2-1B\",\n",
39
- " \"architectures\": [\n",
40
- " \"LlamaForCausalLM\"\n",
41
- " ],\n",
42
- " \"attention_bias\": false,\n",
43
- " \"attention_dropout\": 0.0,\n",
44
- " \"bos_token_id\": 128000,\n",
45
- " \"eos_token_id\": 128001,\n",
46
- " \"head_dim\": 64,\n",
47
- " \"hidden_act\": \"silu\",\n",
48
- " \"hidden_size\": 2048,\n",
49
- " \"initializer_range\": 0.02,\n",
50
- " \"intermediate_size\": 8192,\n",
51
- " \"max_position_embeddings\": 131072,\n",
52
- " \"mlp_bias\": false,\n",
53
- " \"model_type\": \"llama\",\n",
54
- " \"num_attention_heads\": 32,\n",
55
- " \"num_hidden_layers\": 16,\n",
56
- " \"num_key_value_heads\": 8,\n",
57
- " \"pretraining_tp\": 1,\n",
58
- " \"rms_norm_eps\": 1e-05,\n",
59
- " \"rope_scaling\": {\n",
60
- " \"factor\": 32.0,\n",
61
- " \"high_freq_factor\": 4.0,\n",
62
- " \"low_freq_factor\": 1.0,\n",
63
- " \"original_max_position_embeddings\": 8192,\n",
64
- " \"rope_type\": \"llama3\"\n",
65
- " },\n",
66
- " \"rope_theta\": 500000.0,\n",
67
- " \"tie_word_embeddings\": true,\n",
68
- " \"torch_dtype\": \"bfloat16\",\n",
69
- " \"transformers_version\": \"4.47.0\",\n",
70
- " \"use_cache\": true,\n",
71
- " \"vocab_size\": 128256\n",
72
- "}\n",
73
- "\n"
74
- ]
75
- },
76
- {
77
- "name": "stderr",
78
- "output_type": "stream",
79
- "text": [
80
- "/tmp/ipykernel_3716586/1335258174.py:14: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
81
- " model.classifier.load_state_dict(torch.load(classification_head_path))\n"
82
- ]
83
- },
84
- {
85
- "data": {
86
- "text/plain": [
87
- "LlamaClassificationModel(\n",
88
- " (base_model): LlamaModel(\n",
89
- " (embed_tokens): Embedding(128256, 2048)\n",
90
- " (layers): ModuleList(\n",
91
- " (0-15): 16 x LlamaDecoderLayer(\n",
92
- " (self_attn): LlamaSdpaAttention(\n",
93
- " (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
94
- " (k_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
95
- " (v_proj): Linear(in_features=2048, out_features=512, bias=False)\n",
96
- " (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
97
- " (rotary_emb): LlamaRotaryEmbedding()\n",
98
- " )\n",
99
- " (mlp): LlamaMLP(\n",
100
- " (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
101
- " (up_proj): Linear(in_features=2048, out_features=8192, bias=False)\n",
102
- " (down_proj): Linear(in_features=8192, out_features=2048, bias=False)\n",
103
- " (act_fn): SiLU()\n",
104
- " )\n",
105
- " (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
106
- " (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)\n",
107
- " )\n",
108
- " )\n",
109
- " (norm): LlamaRMSNorm((2048,), eps=1e-05)\n",
110
- " (rotary_emb): LlamaRotaryEmbedding()\n",
111
- " )\n",
112
- " (classifier): Linear(in_features=2048, out_features=4, bias=True)\n",
113
- ")"
114
- ]
115
- },
116
- "execution_count": 3,
117
- "metadata": {},
118
- "output_type": "execute_result"
119
- }
120
- ],
121
- "source": [
122
- "# Initialize the model\n",
123
- "# model = DistilBertClassificationModel(repo_id)\n",
124
- "model = LlamaClassificationModel()\n",
125
- "\n",
126
- "# Load the tokenizer\n",
127
- "tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
128
- "\n",
129
- "classification_head_path = hf_hub_download(\n",
130
- " repo_id=repo_id,\n",
131
- " repo_type=\"model\",\n",
132
- " filename=\"classification_head.pt\"\n",
133
- ")\n",
134
- "\n",
135
- "model.classifier.load_state_dict(torch.load(classification_head_path))\n",
136
- "model.eval() # Set the model to evaluation mode"
137
- ]
138
- },
139
- {
140
- "cell_type": "code",
141
- "execution_count": 56,
142
- "metadata": {},
143
- "outputs": [
144
- {
145
- "name": "stdout",
146
- "output_type": "stream",
147
- "text": [
148
- "tensor([[1, 0, 0, 0]], dtype=torch.int32)\n"
149
- ]
150
- }
151
- ],
152
- "source": [
153
- "# text = \"What defects would occur with a beam size of 100 microns, a power of 500 W, a velocity of 100 mm/s and layer height of 10 microns and a hatch spacing of 10 microns for Ti-6Al-4V\"\n",
154
- "# text = \"SS316L[SEP]500 W[SEP]10.0 mm/s[SEP]500.0 microns[SEP]500.0 microns[SEP]100.0 microns\"\n",
155
- "text = \"SS316L[SEP]250.0 W[SEP]280.0 mm/s[SEP][SEP]950.0 microns[SEP]600.0 microns\"\n",
156
- "\n",
157
- "# Ensure the model is on the GPU\n",
158
- "# device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
159
- "device = \"cpu\"\n",
160
- "model = model.to(device)\n",
161
- "\n",
162
- "# Tokenize input for the entire batch and move to GPU\n",
163
- "inputs = tokenizer(text, return_tensors=\"pt\", truncation=True, padding=\"max_length\", max_length=256)\n",
164
- "inputs = {key: value.to(device) for key, value in inputs.items()}\n",
165
- "\n",
166
- "# Perform inference\n",
167
- "outputs = model(**inputs)\n",
168
- "\n",
169
- "# Extract logits and apply sigmoid activation for multi-label classification\n",
170
- "logits = outputs[\"logits\"]\n",
171
- "probs = torch.sigmoid(logits)\n",
172
- "\n",
173
- "# Convert probabilities to one-hot encoded labels\n",
174
- "preds = (probs > 0.5).int()\n",
175
- "\n",
176
- "# None, keyhole, lack of fusion, balling\n",
177
- "print(preds)"
178
- ]
179
- },
180
- {
181
- "cell_type": "code",
182
- "execution_count": 1,
183
- "metadata": {},
184
- "outputs": [
185
- {
186
- "name": "stderr",
187
- "output_type": "stream",
188
- "text": [
189
- "/mnt/am/GitHub/LLM-Enabled-Process-Map/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
190
- " from .autonotebook import tqdm as notebook_tqdm\n"
191
- ]
192
- }
193
- ],
194
- "source": [
195
- "import torch.nn as nn\n",
196
- "from transformers import PreTrainedModel\n",
197
- "\n",
198
- "class PretrainedLlamaClassificationModel(PreTrainedModel):\n",
199
- " def __init__(self, config):\n",
200
- " super().__init__(config)\n",
201
- " self.base_model = AutoModel.from_pretrained(config.model_path, config=config)\n",
202
- " self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
203
- " self.config = config\n",
204
- "\n",
205
- " def forward(self, input_ids, attention_mask, labels=None):\n",
206
- " outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)\n",
207
- " summed_representation = outputs.last_hidden_state.sum(dim=1)\n",
208
- " logits = self.classifier(summed_representation)\n",
209
- " loss = None\n",
210
- " if labels is not None:\n",
211
- " loss_fn = nn.BCEWithLogitsLoss()\n",
212
- " loss = loss_fn(logits, labels.float())\n",
213
- " return {\"loss\": loss, \"logits\": logits}\n"
214
- ]
215
- },
216
- {
217
- "cell_type": "code",
218
- "execution_count": 1,
219
- "metadata": {},
220
- "outputs": [
221
- {
222
- "name": "stderr",
223
- "output_type": "stream",
224
- "text": [
225
- "/mnt/am/GitHub/LLM-Enabled-Process-Map/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
226
- " from .autonotebook import tqdm as notebook_tqdm\n",
227
- "Some weights of LlamaModel were not initialized from the model checkpoint at ppak10/defect-classification-llama-baseline-25-epochs and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10.mlp.up_proj.weight', 'layers.10.post_attention_layernorm.weight', 'layers.10.self_attn.k_proj.weight', 'layers.10.self_attn.o_proj.weight', 'layers.10.self_attn.q_proj.weight', 'layers.10.self_attn.v_proj.weight', 'layers.11.input_layernorm.weight', 'layers.11.mlp.down_proj.weight', 'layers.11.mlp.gate_proj.weight', 'layers.11.mlp.up_proj.weight', 'layers.11.post_attention_layernorm.weight', 'layers.11.self_attn.k_proj.weight', 'layers.11.self_attn.o_proj.weight', 'layers.11.self_attn.q_proj.weight', 'layers.11.self_attn.v_proj.weight', 'layers.12.input_layernorm.weight', 'layers.12.mlp.down_proj.weight', 'layers.12.mlp.gate_proj.weight', 'layers.12.mlp.up_proj.weight', 'layers.12.post_attention_layernorm.weight', 'layers.12.self_attn.k_proj.weight', 'layers.12.self_attn.o_proj.weight', 'layers.12.self_attn.q_proj.weight', 'layers.12.self_attn.v_proj.weight', 'layers.13.input_layernorm.weight', 'layers.13.mlp.down_proj.weight', 'layers.13.mlp.gate_proj.weight', 'layers.13.mlp.up_proj.weight', 'layers.13.post_attention_layernorm.weight', 'layers.13.self_attn.k_proj.weight', 'layers.13.self_attn.o_proj.weight', 'layers.13.self_attn.q_proj.weight', 'layers.13.self_attn.v_proj.weight', 'layers.14.input_layernorm.weight', 'layers.14.mlp.down_proj.weight', 'layers.14.mlp.gate_proj.weight', 'layers.14.mlp.up_proj.weight', 'layers.14.post_attention_layernorm.weight', 'layers.14.self_attn.k_proj.weight', 'layers.14.self_attn.o_proj.weight', 'layers.14.self_attn.q_proj.weight', 'layers.14.self_attn.v_proj.weight', 'layers.15.input_layernorm.weight', 'layers.15.mlp.down_proj.weight', 'layers.15.mlp.gate_proj.weight', 'layers.15.mlp.up_proj.weight', 'layers.15.post_attention_layernorm.weight', 'layers.15.self_attn.k_proj.weight', 'layers.15.self_attn.o_proj.weight', 'layers.15.self_attn.q_proj.weight', 'layers.15.self_attn.v_proj.weight', 'layers.16.input_layernorm.weight', 'layers.16.mlp.down_proj.weight', 'layers.16.mlp.gate_proj.weight', 'layers.16.mlp.up_proj.weight', 'layers.16.post_attention_layernorm.weight', 'layers.16.self_attn.k_proj.weight', 'layers.16.self_attn.o_proj.weight', 'layers.16.self_attn.q_proj.weight', 'layers.16.self_attn.v_proj.weight', 'layers.17.input_layernorm.weight', 'layers.17.mlp.down_proj.weight', 'layers.17.mlp.gate_proj.weight', 'layers.17.mlp.up_proj.weight', 'layers.17.post_attention_layernorm.weight', 'layers.17.self_attn.k_proj.weight', 'layers.17.self_attn.o_proj.weight', 'layers.17.self_attn.q_proj.weight', 'layers.17.self_attn.v_proj.weight', 'layers.18.input_layernorm.weight', 'layers.18.mlp.down_proj.weight', 'layers.18.mlp.gate_proj.weight', 'layers.18.mlp.up_proj.weight', 'layers.18.post_attention_layernorm.weight', 'layers.18.self_attn.k_proj.weight', 'layers.18.self_attn.o_proj.weight', 'layers.18.self_attn.q_proj.weight', 'layers.18.self_attn.v_proj.weight', 'layers.19.input_layernorm.weight', 'layers.19.mlp.down_proj.weight', 'layers.19.mlp.gate_proj.weight', 'layers.19.mlp.up_proj.weight', 'layers.19.post_attention_layernorm.weight', 'layers.19.self_attn.k_proj.weight', 'layers.19.self_attn.o_proj.weight', 'layers.19.self_attn.q_proj.weight', 'layers.19.self_attn.v_proj.weight', 'layers.2.input_layernorm.weight', 'layers.2.mlp.down_proj.weight', 'layers.2.mlp.gate_proj.weight', 'layers.2.mlp.up_proj.weight', 'layers.2.post_attention_layernorm.weight', 'layers.2.self_attn.k_proj.weight', 'layers.2.self_attn.o_proj.weight', 'layers.2.self_attn.q_proj.weight', 'layers.2.self_attn.v_proj.weight', 'layers.20.input_layernorm.weight', 'layers.20.mlp.down_proj.weight', 'layers.20.mlp.gate_proj.weight', 'layers.20.mlp.up_proj.weight', 'layers.20.post_attention_layernorm.weight', 'layers.20.self_attn.k_proj.weight', 'layers.20.self_attn.o_proj.weight', 'layers.20.self_attn.q_proj.weight', 'layers.20.self_attn.v_proj.weight', 'layers.21.input_layernorm.weight', 'layers.21.mlp.down_proj.weight', 'layers.21.mlp.gate_proj.weight', 'layers.21.mlp.up_proj.weight', 'layers.21.post_attention_layernorm.weight', 'layers.21.self_attn.k_proj.weight', 'layers.21.self_attn.o_proj.weight', 'layers.21.self_attn.q_proj.weight', 'layers.21.self_attn.v_proj.weight', 'layers.22.input_layernorm.weight', 'layers.22.mlp.down_proj.weight', 'layers.22.mlp.gate_proj.weight', 'layers.22.mlp.up_proj.weight', 'layers.22.post_attention_layernorm.weight', 'layers.22.self_attn.k_proj.weight', 'layers.22.self_attn.o_proj.weight', 'layers.22.self_attn.q_proj.weight', 'layers.22.self_attn.v_proj.weight', 'layers.23.input_layernorm.weight', 'layers.23.mlp.down_proj.weight', 'layers.23.mlp.gate_proj.weight', 'layers.23.mlp.up_proj.weight', 'layers.23.post_attention_layernorm.weight', 'layers.23.self_attn.k_proj.weight', 'layers.23.self_attn.o_proj.weight', 'layers.23.self_attn.q_proj.weight', 'layers.23.self_attn.v_proj.weight', 'layers.24.input_layernorm.weight', 'layers.24.mlp.down_proj.weight', 'layers.24.mlp.gate_proj.weight', 'layers.24.mlp.up_proj.weight', 'layers.24.post_attention_layernorm.weight', 'layers.24.self_attn.k_proj.weight', 'layers.24.self_attn.o_proj.weight', 'layers.24.self_attn.q_proj.weight', 'layers.24.self_attn.v_proj.weight', 'layers.25.input_layernorm.weight', 'layers.25.mlp.down_proj.weight', 'layers.25.mlp.gate_proj.weight', 'layers.25.mlp.up_proj.weight', 'layers.25.post_attention_layernorm.weight', 'layers.25.self_attn.k_proj.weight', 'layers.25.self_attn.o_proj.weight', 'layers.25.self_attn.q_proj.weight', 'layers.25.self_attn.v_proj.weight', 'layers.26.input_layernorm.weight', 'layers.26.mlp.down_proj.weight', 'layers.26.mlp.gate_proj.weight', 'layers.26.mlp.up_proj.weight', 'layers.26.post_attention_layernorm.weight', 'layers.26.self_attn.k_proj.weight', 'layers.26.self_attn.o_proj.weight', 'layers.26.self_attn.q_proj.weight', 'layers.26.self_attn.v_proj.weight', 'layers.27.input_layernorm.weight', 'layers.27.mlp.down_proj.weight', 'layers.27.mlp.gate_proj.weight', 'layers.27.mlp.up_proj.weight', 'layers.27.post_attention_layernorm.weight', 'layers.27.self_attn.k_proj.weight', 'layers.27.self_attn.o_proj.weight', 'layers.27.self_attn.q_proj.weight', 'layers.27.self_attn.v_proj.weight', 'layers.28.input_layernorm.weight', 'layers.28.mlp.down_proj.weight', 'layers.28.mlp.gate_proj.weight', 'layers.28.mlp.up_proj.weight', 'layers.28.post_attention_layernorm.weight', 'layers.28.self_attn.k_proj.weight', 'layers.28.self_attn.o_proj.weight', 'layers.28.self_attn.q_proj.weight', 'layers.28.self_attn.v_proj.weight', 'layers.29.input_layernorm.weight', 'layers.29.mlp.down_proj.weight', 'layers.29.mlp.gate_proj.weight', 'layers.29.mlp.up_proj.weight', 'layers.29.post_attention_layernorm.weight', 'layers.29.self_attn.k_proj.weight', 'layers.29.self_attn.o_proj.weight', 'layers.29.self_attn.q_proj.weight', 'layers.29.self_attn.v_proj.weight', 'layers.3.input_layernorm.weight', 'layers.3.mlp.down_proj.weight', 'layers.3.mlp.gate_proj.weight', 'layers.3.mlp.up_proj.weight', 'layers.3.post_attention_layernorm.weight', 'layers.3.self_attn.k_proj.weight', 'layers.3.self_attn.o_proj.weight', 'layers.3.self_attn.q_proj.weight', 'layers.3.self_attn.v_proj.weight', 'layers.30.input_layernorm.weight', 'layers.30.mlp.down_proj.weight', 'layers.30.mlp.gate_proj.weight', 'layers.30.mlp.up_proj.weight', 'layers.30.post_attention_layernorm.weight', 'layers.30.self_attn.k_proj.weight', 'layers.30.self_attn.o_proj.weight', 'layers.30.self_attn.q_proj.weight', 'layers.30.self_attn.v_proj.weight', 'layers.31.input_layernorm.weight', 'layers.31.mlp.down_proj.weight', 'layers.31.mlp.gate_proj.weight', 'layers.31.mlp.up_proj.weight', 'layers.31.post_attention_layernorm.weight', 'layers.31.self_attn.k_proj.weight', 'layers.31.self_attn.o_proj.weight', 'layers.31.self_attn.q_proj.weight', 'layers.31.self_attn.v_proj.weight', 'layers.4.input_layernorm.weight', 'layers.4.mlp.down_proj.weight', 'layers.4.mlp.gate_proj.weight', 'layers.4.mlp.up_proj.weight', 'layers.4.post_attention_layernorm.weight', 'layers.4.self_attn.k_proj.weight', 'layers.4.self_attn.o_proj.weight', 'layers.4.self_attn.q_proj.weight', 'layers.4.self_attn.v_proj.weight', 'layers.5.input_layernorm.weight', 'layers.5.mlp.down_proj.weight', 'layers.5.mlp.gate_proj.weight', 'layers.5.mlp.up_proj.weight', 'layers.5.post_attention_layernorm.weight', 'layers.5.self_attn.k_proj.weight', 'layers.5.self_attn.o_proj.weight', 'layers.5.self_attn.q_proj.weight', 'layers.5.self_attn.v_proj.weight', 'layers.6.input_layernorm.weight', 'layers.6.mlp.down_proj.weight', 'layers.6.mlp.gate_proj.weight', 'layers.6.mlp.up_proj.weight', 'layers.6.post_attention_layernorm.weight', 'layers.6.self_attn.k_proj.weight', 'layers.6.self_attn.o_proj.weight', 'layers.6.self_attn.q_proj.weight', 'layers.6.self_attn.v_proj.weight', 'layers.7.input_layernorm.weight', 'layers.7.mlp.down_proj.weight', 'layers.7.mlp.gate_proj.weight', 'layers.7.mlp.up_proj.weight', 'layers.7.post_attention_layernorm.weight', 'layers.7.self_attn.k_proj.weight', 'layers.7.self_attn.o_proj.weight', 'layers.7.self_attn.q_proj.weight', 'layers.7.self_attn.v_proj.weight', 'layers.8.input_layernorm.weight', 'layers.8.mlp.down_proj.weight', 'layers.8.mlp.gate_proj.weight', 'layers.8.mlp.up_proj.weight', 'layers.8.post_attention_layernorm.weight', 'layers.8.self_attn.k_proj.weight', 'layers.8.self_attn.o_proj.weight', 'layers.8.self_attn.q_proj.weight', 'layers.8.self_attn.v_proj.weight', 'layers.9.input_layernorm.weight', 'layers.9.mlp.down_proj.weight', 'layers.9.mlp.gate_proj.weight', 'layers.9.mlp.up_proj.weight', 'layers.9.post_attention_layernorm.weight', 'layers.9.self_attn.k_proj.weight', 'layers.9.self_attn.o_proj.weight', 'layers.9.self_attn.q_proj.weight', 'layers.9.self_attn.v_proj.weight', 'norm.weight']\n",
228
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
229
- ]
230
- }
231
- ],
232
- "source": [
233
- "from transformers import AutoModel, pipeline\n",
234
- "\n",
235
- "repo_id = \"ppak10/defect-classification-llama-baseline-25-epochs\"\n",
236
- "model = AutoModel.from_pretrained(repo_id)\n",
237
- "# tokenizer = AutoTokenizer.from_pretrained(repo_id)\n",
238
- "\n",
239
- "# classification_pipeline = pipeline(\"text-classification\", model=model, tokenizer=tokenizer)\n",
240
- "# result = classification_pipeline(\"Test input text\")\n",
241
- "# print(result)\n"
242
- ]
243
- },
244
- {
245
- "cell_type": "code",
246
- "execution_count": 2,
247
- "metadata": {},
248
- "outputs": [
249
- {
250
- "name": "stdout",
251
- "output_type": "stream",
252
- "text": [
253
- "LlamaModel(\n",
254
- " (embed_tokens): Embedding(32000, 2048)\n",
255
- " (layers): ModuleList(\n",
256
- " (0-31): 32 x LlamaDecoderLayer(\n",
257
- " (self_attn): LlamaSdpaAttention(\n",
258
- " (q_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
259
- " (k_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
260
- " (v_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
261
- " (o_proj): Linear(in_features=2048, out_features=2048, bias=False)\n",
262
- " (rotary_emb): LlamaRotaryEmbedding()\n",
263
- " )\n",
264
- " (mlp): LlamaMLP(\n",
265
- " (gate_proj): Linear(in_features=2048, out_features=11008, bias=False)\n",
266
- " (up_proj): Linear(in_features=2048, out_features=11008, bias=False)\n",
267
- " (down_proj): Linear(in_features=11008, out_features=2048, bias=False)\n",
268
- " (act_fn): SiLU()\n",
269
- " )\n",
270
- " (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)\n",
271
- " (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)\n",
272
- " )\n",
273
- " )\n",
274
- " (norm): LlamaRMSNorm((2048,), eps=1e-06)\n",
275
- " (rotary_emb): LlamaRotaryEmbedding()\n",
276
- ")\n"
277
- ]
278
- }
279
- ],
280
- "source": [
281
- "print(model)"
282
- ]
283
- },
284
- {
285
- "cell_type": "code",
286
- "execution_count": null,
287
- "metadata": {},
288
- "outputs": [],
289
- "source": []
290
- }
291
- ],
292
- "metadata": {
293
- "kernelspec": {
294
- "display_name": "venv",
295
- "language": "python",
296
- "name": "python3"
297
- },
298
- "language_info": {
299
- "codemirror_mode": {
300
- "name": "ipython",
301
- "version": 3
302
- },
303
- "file_extension": ".py",
304
- "mimetype": "text/x-python",
305
- "name": "python",
306
- "nbconvert_exporter": "python",
307
- "pygments_lexer": "ipython3",
308
- "version": "3.10.12"
309
- }
310
- },
311
- "nbformat": 4,
312
- "nbformat_minor": 2
313
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,3 +1,9 @@
1
  jupyterlab==4.2.5
2
- tornado==6.2
3
- ipywidgets
 
 
 
 
 
 
 
1
  jupyterlab==4.2.5
2
+ ipywidgets
3
+ torch
4
+ huggingface_hub
5
+ transformers
6
+ sentencepiece
7
+ safetensors
8
+ datasets
9
+ numpy
setup.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="llm_enabled_process_map",
5
+ packages=find_packages()
6
+ )
start_server.sh CHANGED
@@ -1,7 +1,7 @@
1
  #!/bin/bash
2
  JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
3
 
4
- NOTEBOOK_DIR="./notebooks"
5
 
6
  jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
7
 
 
1
  #!/bin/bash
2
  JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
3
 
4
+ NOTEBOOK_DIR="model"
5
 
6
  jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
7