mohitkumarrajbadi commited on
Commit
2bdd84f
·
1 Parent(s): a997aba

New Framework Change

Browse files
__pycache__/utils.cpython-313.pyc ADDED
Binary file (20.8 kB). View file
 
app.py CHANGED
@@ -1,355 +1,18 @@
1
  import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import torch
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
- import matplotlib.pyplot as plt
7
- import time
8
- import json
9
- import re
10
- import os
11
- import asyncio
12
 
13
- # Testing The Work
14
 
15
- # -------------------------------
16
- # Utility Functions
17
- # -------------------------------
18
-
19
- token = st.secrets["HF_TOKEN"]
20
- os.environ['CURL_CA_BUNDLE'] = ''
21
-
22
- @st.cache_resource
23
- def load_model(model_id: str, token: str):
24
- """
25
- Loads and caches the Gemma model and tokenizer with authentication token.
26
- """
27
- try:
28
- # Create and run an event loop explicitly
29
- asyncio.run(async_load(model_id, token))
30
-
31
- # Ensure torch classes path is valid (optional)
32
- if not hasattr(torch, "classes") or not torch.classes:
33
- torch.classes = torch._C._get_python_module("torch.classes")
34
-
35
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
36
- model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
37
-
38
- return tokenizer, model
39
-
40
- except Exception as e:
41
- print(f"An error occurred: {e}")
42
- st.error(f"Model loading failed: {e}")
43
- return None, None
44
-
45
- async def async_load(model_id, token):
46
- """
47
- Dummy async function to initialize the event loop.
48
- """
49
- await asyncio.sleep(0.1) # Dummy async operation
50
-
51
- def preprocess_data(uploaded_file, file_extension):
52
- """
53
- Reads the uploaded file and returns a processed version.
54
- Supports CSV, JSONL, and TXT.
55
- """
56
- data = None
57
- try:
58
- if file_extension == "csv":
59
- data = pd.read_csv(uploaded_file)
60
- elif file_extension == "jsonl":
61
- # Each line is a JSON object.
62
- data = [json.loads(line) for line in uploaded_file.readlines()]
63
- try:
64
- data = pd.DataFrame(data)
65
- except Exception:
66
- st.warning("Unable to convert JSONL to a table. Previewing raw JSON objects.")
67
- elif file_extension == "txt":
68
- text_data = uploaded_file.read().decode("utf-8")
69
- data = text_data.splitlines()
70
- except Exception as e:
71
- st.error(f"Error processing file: {e}")
72
- return data
73
-
74
- def clean_text(text, lowercase=True, remove_punctuation=True):
75
- """
76
- Cleans text data by applying basic normalization.
77
- """
78
- if lowercase:
79
- text = text.lower()
80
- if remove_punctuation:
81
- text = re.sub(r'[^\w\s]', '', text)
82
- return text
83
-
84
- def plot_training_metrics(epochs, loss_values, accuracy_values):
85
- """
86
- Returns a matplotlib figure plotting training loss and accuracy.
87
- """
88
- fig, ax = plt.subplots(1, 2, figsize=(12, 4))
89
- ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
90
- ax[0].set_title("Training Loss")
91
- ax[0].set_xlabel("Epoch")
92
- ax[0].set_ylabel("Loss")
93
-
94
- ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
95
- ax[1].set_title("Training Accuracy")
96
- ax[1].set_xlabel("Epoch")
97
- ax[1].set_ylabel("Accuracy")
98
-
99
- return fig
100
-
101
- def simulate_training(num_epochs):
102
- """
103
- Simulates a training loop for demonstration.
104
- Yields current epoch, loss values, and accuracy values.
105
- Replace this with your actual fine-tuning loop.
106
- """
107
- loss_values = []
108
- accuracy_values = []
109
- for epoch in range(1, num_epochs + 1):
110
- loss = np.exp(-epoch) + np.random.random() * 0.1
111
- acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
112
- loss_values.append(loss)
113
- accuracy_values.append(acc)
114
- yield epoch, loss_values, accuracy_values
115
- time.sleep(1) # Simulate computation time
116
-
117
- def quantize_model(model):
118
- """
119
- Applies dynamic quantization for demonstration.
120
- In practice, adjust this based on your model and target hardware.
121
- """
122
- quantized_model = torch.quantization.quantize_dynamic(
123
- model, {torch.nn.Linear}, dtype=torch.qint8
124
- )
125
- return quantized_model
126
-
127
- def convert_to_torchscript(model):
128
- """
129
- Converts the model to TorchScript format.
130
- """
131
- example_input = torch.randint(0, 100, (1, 10))
132
- traced_model = torch.jit.trace(model, example_input)
133
- return traced_model
134
-
135
- def convert_to_onnx(model, output_path="model.onnx"):
136
- """
137
- Converts the model to ONNX format.
138
- """
139
- dummy_input = torch.randint(0, 100, (1, 10))
140
- torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
141
- return output_path
142
-
143
- def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
144
- """
145
- Loads the fine-tuned model from the checkpoint.
146
- """
147
- if os.path.exists(checkpoint_path):
148
- model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
149
- model.eval()
150
- st.success("Fine-tuned model loaded successfully!")
151
- else:
152
- st.error(f"Checkpoint not found: {checkpoint_path}")
153
- return model
154
-
155
-
156
- def generate_response(prompt, model, tokenizer, max_length=200):
157
- """
158
- Generates a response using the fine-tuned model.
159
- """
160
- # Tokenize the prompt
161
- inputs = tokenizer(prompt, return_tensors="pt").input_ids
162
-
163
- # Generate text
164
- with torch.no_grad():
165
- outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
166
-
167
- # Decode the output
168
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
169
- return response
170
-
171
-
172
- # -------------------------------
173
- # Application Layout
174
- # -------------------------------
175
-
176
- st.title("One-Stop Gemma Model Fine-tuning, Quantization & Conversion UI")
177
- st.markdown("""
178
- This application is designed for beginners in generative AI.
179
- It allows you to fine-tune, quantize, and convert Gemma models with an intuitive UI.
180
- You can upload your dataset, clean and preview your data, configure training parameters, and export your model in different formats.
181
- """)
182
-
183
- # Sidebar: Model selection and data upload
184
- st.sidebar.header("Configuration")
185
-
186
- # Model Selection
187
- selected_model = st.sidebar.selectbox("Select Gemma Model", options=["Gemma-Small", "Gemma-Medium", "Gemma-Large"])
188
- if selected_model == "google/gemma-3-1b-it":
189
- model_id = "google/gemma-3-1b-it"
190
- elif selected_model == "google/gemma-3-4b-it":
191
- model_id = "google/gemma-3-4b-it"
192
- else:
193
- model_id = "google/gemma-3-1b-it"
194
-
195
- loading_placeholder = st.sidebar.empty()
196
- loading_placeholder.info("Loading model...")
197
- tokenizer, model = load_model(model_id, token)
198
- loading_placeholder.success("Model loaded.")
199
-
200
-
201
- # Dataset Upload
202
- uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSONL, TXT)", type=["csv", "jsonl", "txt"])
203
- data = None
204
- if uploaded_file is not None:
205
- file_ext = uploaded_file.name.split('.')[-1].lower()
206
- data = preprocess_data(uploaded_file, file_ext)
207
- st.sidebar.subheader("Dataset Preview:")
208
- if isinstance(data, pd.DataFrame):
209
- st.sidebar.dataframe(data.head())
210
- elif isinstance(data, list):
211
- st.sidebar.write(data[:5])
212
- else:
213
- st.sidebar.write(data)
214
- else:
215
- st.sidebar.info("Awaiting dataset upload.")
216
-
217
- # Data Cleaning Options (for TXT files)
218
- if uploaded_file is not None and file_ext == "txt":
219
- st.sidebar.subheader("Data Cleaning Options")
220
- lowercase_option = st.sidebar.checkbox("Convert to lowercase", value=True)
221
- remove_punct = st.sidebar.checkbox("Remove punctuation", value=True)
222
- cleaned_data = [clean_text(line, lowercase=lowercase_option, remove_punctuation=remove_punct) for line in data]
223
- st.sidebar.text_area("Cleaned Data Preview", value="\n".join(cleaned_data[:5]), height=150)
224
-
225
- # Main Tabs for Different Operations
226
- tabs = st.tabs(["Fine-tuning", "Quantization", "Model Conversion"])
227
-
228
- # -------------------------------
229
- # Fine-tuning Tab
230
- # -------------------------------
231
- with tabs[0]:
232
- st.header("Fine-tuning")
233
- st.markdown("Configure hyperparameters and start fine-tuning your Gemma model.")
234
-
235
- col1, col2, col3 = st.columns(3)
236
- with col1:
237
- learning_rate = st.number_input("Learning Rate", value=1e-4, format="%.5f")
238
- with col2:
239
- batch_size = st.number_input("Batch Size", value=16, step=1)
240
- with col3:
241
- epochs = st.number_input("Epochs", value=3, step=1)
242
-
243
- if st.button("Start Fine-tuning"):
244
- if data is None:
245
- st.error("Please upload a dataset first!")
246
- else:
247
- st.info("Starting fine-tuning...")
248
- progress_bar = st.progress(0)
249
- training_placeholder = st.empty()
250
- loss_values = []
251
- accuracy_values = []
252
-
253
- # Simulate training loop (replace with your actual training code)
254
- for epoch, losses, accs in simulate_training(epochs):
255
- fig = plot_training_metrics(epoch, losses, accs)
256
- training_placeholder.pyplot(fig)
257
- progress_bar.progress(epoch/epochs)
258
- st.success("Fine-tuning completed!")
259
-
260
- # Save the fine-tuned model (for demonstration, saving state_dict)
261
- if model:
262
- torch.save(model.state_dict(), "fine_tuned_model.pt")
263
- with open("fine_tuned_model.pt", "rb") as f:
264
- st.download_button("Download Fine-tuned Model", data=f, file_name="fine_tuned_model.pt", mime="application/octet-stream")
265
- else:
266
- st.error("Model not loaded. Cannot save.")
267
-
268
-
269
- # -------------------------------
270
- # Quantization Tab
271
- # -------------------------------
272
- with tabs[1]:
273
- st.header("Model Quantization")
274
- st.markdown("Quantize your model to optimize for inference performance.")
275
- quantize_choice = st.radio("Select Quantization Type", options=["Dynamic Quantization"], index=0)
276
-
277
- if st.button("Apply Quantization"):
278
- with st.spinner("Applying quantization..."):
279
- quantized_model = quantize_model(model)
280
- st.success("Model quantized successfully!")
281
- torch.save(quantized_model.state_dict(), "quantized_model.pt")
282
- with open("quantized_model.pt", "rb") as f:
283
- st.download_button("Download Quantized Model", data=f, file_name="quantized_model.pt", mime="application/octet-stream")
284
-
285
- # -------------------------------
286
- # Model Conversion Tab
287
- # -------------------------------
288
- with tabs[2]:
289
- st.header("Model Conversion")
290
- st.markdown("Convert your model to a different format for deployment or optimization.")
291
- conversion_option = st.selectbox("Select Conversion Format", options=["TorchScript", "ONNX"])
292
-
293
- if st.button("Convert Model"):
294
- if conversion_option == "TorchScript":
295
- with st.spinner("Converting to TorchScript..."):
296
- ts_model = convert_to_torchscript(model)
297
- ts_model.save("model_ts.pt")
298
- st.success("Converted to TorchScript!")
299
- with open("model_ts.pt", "rb") as f:
300
- st.download_button("Download TorchScript Model", data=f, file_name="model_ts.pt", mime="application/octet-stream")
301
- elif conversion_option == "ONNX":
302
- with st.spinner("Converting to ONNX..."):
303
- onnx_path = convert_to_onnx(model, "model.onnx")
304
- st.success("Converted to ONNX!")
305
- with open(onnx_path, "rb") as f:
306
- st.download_button("Download ONNX Model", data=f, file_name="model.onnx", mime="application/octet-stream")
307
-
308
- # -------------------------------
309
- # Response Generation Section
310
- # -------------------------------
311
- st.header("Generate Responses with Fine-Tuned Model")
312
- st.markdown("Use the fine-tuned model to generate text responses based on your prompts.")
313
-
314
- # Check if the fine-tuned model exists
315
- if os.path.exists("fine_tuned_model.pt"):
316
- # Load the fine-tuned model
317
- model = load_finetuned_model(model, "fine_tuned_model.pt")
318
-
319
- # Input prompt for generating responses
320
- prompt = st.text_area("Enter a prompt:", "Once upon a time...")
321
-
322
- # Max length slider
323
- max_length = st.slider("Max Response Length", min_value=50, max_value=500, value=200, step=10)
324
-
325
- if st.button("Generate Response"):
326
- with st.spinner("Generating response..."):
327
- response = generate_response(prompt, model, tokenizer, max_length)
328
- st.success("Generated Response:")
329
- st.write(response)
330
-
331
- else:
332
- st.warning("Fine-tuned model not found. Please fine-tune the model first.")
333
-
334
-
335
- # -------------------------------
336
- # Optional: Cloud Integration Snippet
337
- # -------------------------------
338
- st.header("Cloud Integration")
339
  st.markdown("""
340
- For large-scale training or model storage, consider integrating with Google Cloud Storage or Vertex AI.
341
- Below is an example snippet for uploading your model to GCS:
 
 
 
342
  """)
343
- st.code("""
344
- from google.cloud import storage
345
-
346
- def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
347
- storage_client = storage.Client()
348
- bucket = storage_client.bucket(bucket_name)
349
- blob = bucket.blob(destination_blob_name)
350
- blob.upload_from_filename(source_file_name)
351
- print(f"Uploaded {source_file_name} to {destination_blob_name}")
352
 
353
- # Example usage:
354
- # upload_to_gcs("your-bucket-name", "fine_tuned_model.pt", "models/fine_tuned_model.pt")
355
- """, language="python")
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
2
 
3
+ st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
4
 
5
+ st.title("Gemma LLM Fine-Tuning Suite 🚀")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  st.markdown("""
7
+ ### 🔥 **Multi-page AI Model Trainer**
8
+ - **Chat**: Interact with the model.
9
+ - **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
10
+ - **Conversion**: Export models to TorchScript and ONNX.
11
+ - **Dataset Management**: View and add to your training data.
12
  """)
 
 
 
 
 
 
 
 
 
13
 
14
+ # st.sidebar.title("Navigation")
15
+ # st.sidebar.page_link("pages/Chat.py", label="🔹 Chat")
16
+ # st.sidebar.page_link("pages/Finetune.py", label="🔹 Fine-tuning")
17
+ # st.sidebar.page_link("pages/Conversion.py", label="🔹 Model Conversion")
18
+ # st.sidebar.page_link("pages/Dataset_Management.py", label="🔹 Dataset Management")
datasets/train_data.csv ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt,response
2
+ "Bhai, kal movie dekhne chale?","Haan bhai, kaunsi dekhni hai?"
3
+ Kya haal hai bhai?,"Bas bhai, zindagi chal rahi hai."
4
+ Tu kal gym gaya tha?,"Haan bhai, lekin bas selfie kheechi thi 😎."
5
+ Tere paas charger hai?,"Haan bhai, par battery khatam hai 😬."
6
+ Tu itna late kyun aaya?,"Bhai, traffic ne jaan le li!"
7
+ Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya 😎."
8
+ Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai 😩."
9
+ Tune WhatsApp group chhod diya?,"Haan bhai, bas drama zyada ho gaya tha 😂."
10
+ Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai 🥂."
11
+ Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon 😎."
12
+ Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai 😂."
13
+ Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme 😂."
14
+ Kal cricket khelne chale?,"Bhai, fielding nahi karunga 😎."
15
+ Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de 😂."
16
+ Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai 😂."
17
+ Tune uska reply dekha?,"Haan bhai, bas seen maar diya 😢."
18
+ Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi 😂.
19
+ Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai 😂."
20
+ Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi 😂."
21
+ Khana kha liya?,"Haan bhai, lekin mann nahi bhara 😂."
22
+ Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi 😂."
23
+ Tu kal gym nahi gaya?,"Bhai, motivation so raha tha 😎."
24
+ Tune uska Insta story dekha?,"Haan bhai, dil toot gaya 😂."
25
+ "Bhai, kal salary aayi?","Haan bhai, aur gayi bhi 😂."
26
+ Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya 😂."
27
+ Tu kal late kyun aaya?,"Bhai, nind ki problem hai 😂."
28
+ Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon 😂."
29
+ "Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein 😂."
30
+ "Bhai, kal off hai kya?","Bhai, sapno mein hi hai 😂."
31
+ Tere gharwale strict hain?,"Bhai, Hitler ke fan hain 😂."
32
+ Tune match dekha?,"Haan bhai, heart attack ho gaya tha 😂."
33
+ Tu kal late kyun aaya?,"Bhai, sapno mein tha 😂."
34
+ Tune video banayi?,"Bhai, banayi nahi, viral ho gayi 😂."
35
+ Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon 😂."
36
+ Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le 😂."
37
+ Tu kal kahan tha?,"Bhai, so raha tha 😂."
38
+ Tera net slow hai?,"Bhai, 2G se bhi slow hai 😂."
39
+ Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi 😂."
40
+ Bhai tera crush tera bhai ban gaya 😂,"Bhai, bas dua mein yaad rakhna 😂."
41
+ Tu kal kahan tha?,"Bhai, neend ka band baj raha tha 😂."
42
+ Tune late reply diya?,"Bhai, bas zindagi ka load hai 😂."
43
+ "Bhai, exam ka result aaya?","Haan bhai, bas asar nahi dikha 😂."
44
+ Tu cricket dekh raha hai?,"Bhai, dil thod diya unhone 😂."
45
+ "Bhai, kal ka plan cancel?","Bhai, neend ko priority di 😂."
46
+ Tune job apply ki?,"Bhai, apply nahi, try kar raha hoon 😂."
47
+ Tu kal pakda gaya?,"Bhai, meme share karte hue 😂."
48
+ Tera dost tujhse zyada cool hai?,"Bhai, thoda dukh hua 😂."
49
+ Tu zyada coffee peeta hai?,"Bhai, stress ka side effect hai 😂."
50
+ "Bhai, tera birthday aaya?","Haan bhai, par gift nahi aaya 😂."
51
+ Tu ghar pe hai?,"Haan bhai, ghar hi zindagi hai 😂."
52
+ Tu kal gaya tha?,"Bhai, gaya tha, bhool gaya 😂."
53
+ Tera dukh kya hai bhai?,"Bhai, bus Monday kaam pe jana hai 😂."
54
+ Tu kal kahan gaya?,"Bhai, sapno mein ghoom raha tha 😂."
55
+ Tu kal off tha?,"Bhai, bas dil se 😂."
56
+ Tune wo video dekha?,"Bhai, repeat pe chal raha hai 😂."
57
+ Tera dukh kya hai?,"Bhai, salary khatam ho gayi 😂."
58
+ Tu kal kaam pe gaya?,"Bhai, bas sochta hi reh gaya 😂."
59
+ "Bhai, kal milne chale?","Bhai, ghar hi safe hai 😂."
60
+ Tu kal late aaya?,"Bhai, traffic se dosti ho gayi 😂."
61
+ "Bhai, kal gym chal?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
62
+ Tune uska Insta story dekha?,"Haan bhai, lagta hai woh bhi filter ki dukaan khol li hai! 😆"
63
+ "Yaar, tu itna busy kyun rehta hai?","Kya karoon bhai, zindagi ne 'busy' button daba diya hai! 😅"
64
+ Aaj kal kya chal raha hai?,"Bas bhai, life ka 'software update' pending hai! 🤖"
65
+ Tera boss kaisa hai?,"Bhai, woh toh 'mood swing' ka live example hai! 😜"
66
+ Tu diet pe hai kya?,"Haan, bas 'momos' aur 'pizza' ko diet plan mein adjust karna hai! 🍕"
67
+ Kal party mein kyun nahi aaya?,"Bhai, mera 'Netflix' aur 'bed' ke saath commitment tha! 📺"
68
+ Tune naya web series dekha?,"Haan, ab toh 'binge-watching' mera naya talent ban gaya hai! 🎬"
69
+ Tera phone itna slow kyun hai?,"Bhai, yeh phone nahi, 'tortoise' hai! 🐢"
70
+ Tu itna late kyun aaya?,Traffic ne aaj phir se 'surprise test' le liya! 🚗
71
+ Kya haal hai bhai?,"Bas bhai, zindagi 'buffering' mode mein hai! ⏳"
72
+ Tu kal gym gaya tha?,"Haan bhai, lekin bas 'selfie' kheechi thi! 📸"
73
+ Tere paas charger hai?,"Haan bhai, par battery khatam hai! 🔋"
74
+ Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya! 🧀"
75
+ Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai! 😩"
76
+ Tune WhatsApp group chhod diya?,"Haan bhai, drama zyada ho gaya tha! 🎭"
77
+ Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai! 🥂"
78
+ Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon! 😎"
79
+ Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai! 📱"
80
+ Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme! 😂"
81
+ Kal cricket khelne chale?,"Bhai, fielding nahi karunga! 🏏"
82
+ Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de! 💼"
83
+ Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai! 🤖"
84
+ Tune uska reply dekha?,"Haan bhai, bas seen maar diya! 😢"
85
+ Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi! 🏠
86
+ Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai! 😈"
87
+ Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi! 💻"
88
+ Khana kha liya?,"Haan bhai, lekin mann nahi bhara! 🍛"
89
+ Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi! 😴"
90
+ Tu kal gym nahi gaya?,"Bhai, motivation so raha tha! 🛌"
91
+ Tune uska Insta story dekha?,"Haan bhai, dil toot gaya! 💔"
92
+ "Bhai, kal salary aayi?","Haan bhai, aur gayi bhi! 💸"
93
+ Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya! 📺"
94
+ Tu kal late kyun aaya?,"Bhai, neend ki problem hai! 😪"
95
+ Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon! 🏡"
96
+ "Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein! 🎢"
97
+ "Bhai, kal off hai kya?","Bhai, sapno mein hi hai! 💤"
98
+ Tere gharwale strict hain?,"Bhai, Hitler ke fan hain! 👨‍✈️"
99
+ Tune match dekha?,"Haan bhai, heart attack ho gaya tha! ⚽"
100
+ Tu kal late kyun aaya?,"Bhai, sapno mein tha! 🌌"
101
+ Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥"
102
+ Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon! 📱"
103
+ Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le! 🎶"
104
+ Tu kal kahan tha?,"Bhai, so raha tha! 🛌"
105
+ Tera net slow hai?,"Bhai, 2G se bhi slow hai! 🐢"
106
+ Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi! 🛒"
107
+ Bhai tera crush tera bhai ban gaya!,"Bhai, bas dua mein yaad rakhna! 🙏"
108
+ Tu kal late kyun aaya?,"Bhai, alarm ne bhi haath utha diya tha! 😴"
109
+ Tera net itna slow kyun hai?,"Bhai, turtle race chal rahi hai! 🐢"
110
+ Tune kal ka match dekha?,"Haan bhai, dil ke saath umeed bhi tut gayi! 💔"
111
+ Tu office mein late kyun pahucha?,"Bhai, traffic nahi, zindagi slow chal rahi thi! 🚶‍♂️"
112
+ Tune naya phone liya?,"Haan bhai, EMI ke saath zindagi bhi le li! 💸"
113
+ Tu diet par hai kya?,"Bhai, sirf naam ka, pet ka nahi! 🍕"
114
+ Tu kal gym gaya tha?,"Haan bhai, bas treadmill dekh ke wapas aa gaya! 😂"
115
+ Tera boss kaisa hai?,"Bhai, uske face par hamesha Monday rehta hai! 😩"
116
+ Tune latest movie dekhi?,"Haan bhai, aur story khatam hone se pehle neend aa gayi! 😴"
117
+ "Bhai, kal ka plan pakka?","Bhai, bas mood ka bharosa nahi! 😎"
118
+ Tu kal party kyun nahi aaya?,"Bhai, ghar ka wifi chhod kar jaana nahi chahta tha! 😂"
119
+ Tune job apply ki?,"Bhai, apply nahi, bas try kar raha hoon! 🤞"
120
+ Tu cricket dekh raha hai?,"Bhai, dil hi tod diya unhone! 💔🏏"
121
+ Tu kal kaha gaya tha?,"Bhai, zindagi se milne gaya tha! 🤯"
122
+ Tera phone hang ho gaya?,"Bhai, phone nahi, patience hang ho gaya! 😫"
123
+ "Bhai, kal gym chale?","Bhai, bas protein shake ka sapna dekh raha hoon! 💪😎"
124
+ Tune new song suna?,"Haan bhai, ab toh playlist repeat pe chal rahi hai! 🎧"
125
+ "Bhai, kal milne chale?","Bhai, ghar ka wifi chod kar jaana nahi chahta! 😂"
126
+ Tu kal late kyun aaya?,"Bhai, neend ka overdose ho gaya tha! 😴"
127
+ Tu kal bike se gaya tha?,"Haan bhai, bas hawa se race laga raha tha! 🏍️💨"
128
+ "Bhai, tera pet kaisa hai?","Bhai, mujhse zyada royal treat mil raha hai usko! 🐾😂"
129
+ Tune latest web series dekhi?,"Bhai, binge-watching ke chakkar mein neend ud gayi! 📺😵"
130
+ Tu kal concert gaya tha?,"Haan bhai, awaaz gayab leke wapas aaya! 🎤😂"
131
+ Tu itna busy kyun hai?,"Bhai, zindagi ne full-time job de di hai! 😎"
132
+ Tune naya laptop liya?,"Haan bhai, EMI wali zindagi shuru ho gayi! 💻💸"
133
+ "Bhai, kal kaun sa movie dekhte hain?","Bhai, pehle budget check kar lete hain! 💰😂"
134
+ Tera data khatam ho gaya?,"Bhai, reels ka talent hi yeh hai! 📱😂"
135
+ Tu kal late kyun aaya?,"Bhai, bed ne chodhne se mana kar diya tha! 😂"
136
+ Tune ghar ka kaam kiya?,"Bhai, ghar ka kaam nahi, dukh pocha tha! 😂"
137
+ "Bhai, exam ka preparation kaisa hai?","Bhai, bas syllabus ke sapne dekh raha hoon! 📚😎"
138
+ Tu kal gym gaya tha?,"Haan bhai, lekin sirf water cooler tak! 🚶‍♂️😂"
139
+ Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥🔥"
140
+ "Bhai, tu shopping gaya?","Haan bhai, window shopping expert ban gaya! 🛒😂"
141
+ Tune ghar ka kaam kiya?,"Bhai, bas mobile ka safai abhiyan chal raha tha! 📱😂"
142
+ Tu kal kahan tha?,"Bhai, khayalon mein ghoom raha tha! 🤯😂"
143
+ Tera boss kaisa hai?,"Bhai, Monday ke mood mein hi rehta hai! 😩"
144
+ Tu kal party gaya tha?,"Haan bhai, DJ se zyada khana baja raha tha! 🍕🥂😂"
145
+ Tune latest web series dekhi?,"Bhai, dekh ke neend ki yaad aa gayi! 😴📺"
146
+ Tere paas charger hai?,"Haan bhai, lekin khud bhi charging dhund raha hai! 🔋��"
147
+ "Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein trip karne ka plan hai! ✈️😂"
148
+ Tu kal gym gaya tha?,"Haan bhai, bas mirror selfies li thi! 📸😎"
149
+ Tune uska Insta dekha?,"Bhai, filter se zyada kuch nahi dikha! 😎😂"
150
+ Tu kal late kyun aaya?,"Bhai, traffic aur zindagi dono slow thi! 🚗😂"
151
+ Tera phone slow hai?,"Bhai, snail bhi sharma jaye is speed se! 🐌😂"
152
+ Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein fit nahi ho raha! 🍕😎"
153
+ "Bhai, kal milne chale?","Bhai, ghar ka wifi nahi chhod sakta! 😂"
154
+ Tu kal kaam pe gaya?,"Bhai, bas neend ka load leke gaya tha! 😴😂"
155
+ Tune shopping ki?,"Haan bhai, cart bhar diya, budget nahi! 🛒💸😂"
156
+ Tera crush online tha?,"Haan bhai, par reply nahi aaya! 😢😂"
157
+ Tune kal ka match dekha?,"Haan bhai, lagta hai team ne bhi hope chhod di! 😭🏏"
158
+ Tu kal late kyun aaya?,"Bhai, neend se break-up nahi ho raha tha! 😴😂"
159
+ Tune naya phone liya?,"Haan bhai, aur EMI leke zindagi bhi le li! 💸📱"
160
+ "Bhai, tu kitna busy rehta hai?","Bhai, zindagi full-time job ban gayi hai! 😂"
161
+ Tune gym join kiya?,"Haan bhai, par membership card hi exercise kar raha hai! 🏋️‍♂️😂"
162
+ Tera dukh kya hai bhai?,"Bhai, salary aayi thi, chali bhi gayi! 💸😩"
163
+ Tu kal kahan tha?,"Bhai, bed ke saath relationship strong ho raha tha! 🛏️❤️"
164
+ Tune ghar ka kaam kiya?,"Haan bhai, remote dhundhne ka kaam! 😂📺"
165
+ Tu kal movie gaya tha?,"Haan bhai, par ticket se zyada popcorn mehenga tha! 🍿💸"
166
+ Tune online shopping ki?,"Bhai, cart bhar diya, wallet khali hai! 😭🛒"
167
+ Tu kal cricket khelne gaya?,"Haan bhai, par fielding se allergy ho gayi thi! 😂🏏"
168
+ Tera net slow hai?,"Bhai, lagta hai wifi bhi break le raha hai! 🐢😂"
169
+ "Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai! 🏝️😂"
170
+ Tu gym gaya tha?,"Haan bhai, par sirf dumbbell dekh ke wapas aa gaya! 💪😂"
171
+ Tune uska Insta dekha?,"Bhai, filter se chehra nahi, zindagi badal gayi! 😂📸"
172
+ "Bhai, kal ka plan confirm?","Bhai, bas mood ke upar depend karta hai! 😂"
173
+ Tune cricket dekha?,"Haan bhai, dil bhi toota aur TV bhi! 😂📺"
174
+ Tu kal date pe gaya tha?,"Haan bhai, par sirf bill bharne gaya tha! 😂💸"
175
+ Tera dukh kya hai bhai?,"Bhai, paise khatam aur mahina baaki hai! 😂💸"
176
+ "Bhai, tera boss kaisa hai?","Bhai, Monday ka live version hai! 😭😎"
177
+ Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
178
+ Tu kal late kyun aaya?,"Bhai, neend aur traffic dono se panga ho gaya! 😎🚗"
179
+ "Bhai, kal salary mili?","Haan bhai, aur khatam bhi ho gayi! 😂💸"
180
+ Tune ghar ka kaam kiya?,"Bhai, bas mobile ka storage saaf kiya! 📱😂"
181
+ Tera crush online tha?,"Haan bhai, par bas status update kiya! 😭😂"
182
+ "Bhai, kal kaam pe gaya?","Bhai, bas attendance dene gaya tha! 😂💼"
183
+ Tune movie dekhi?,"Haan bhai, story se zyada neend achhi thi! 😴🎥"
184
+ Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein nahi aata! 🍕😂"
185
+ Tera phone slow hai?,"Bhai, lagta hai 2G ka comeback ho gaya! 🐢📱"
186
+ Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins dekhe! 😂🛍️"
187
+ "Bhai, weekend pe kya plan hai?","Bhai, bed aur blanket se relationship strong karna hai! 😂🛏️"
188
+ Tu kal bike se gaya tha?,"Haan bhai, hawa se race laga raha tha! 🏍️💨"
189
+ Tune ghar ka kaam kiya?,"Bhai, bas remote dhundh raha tha! 😂📺"
190
+ Tu cricket khelta hai?,"Bhai, bas fielding avoid karta hoon! 😂🏏"
191
+ Tera boss strict hai?,"Bhai, usko smile bhi paid leave pe milti hai! 😂😎"
192
+ Tu kal late kyun aaya?,"Bhai, neend ka over-time ho gaya tha! 😴😂"
193
+ Tune naya web series dekha?,"Haan bhai, binge-watching expert ban gaya hoon! 📺🔥"
194
+ "Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai! 😂"
195
+ Tera dukh kya hai bhai?,"Bhai, zindagi ne no refund policy laga di hai! 😂"
196
+ Tu kal kahaan tha?,"Bhai, khayalon mein ghoom raha tha! 🌌😂"
197
+ "Bhai, kal cricket khelne chale?","Haan bhai, par batting hi karunga! 🏏😎"
198
+ Tune naya phone liya?,"Haan bhai, ab data nahi, EMI khatam ho rahi hai! 💸😂"
199
+ Tu kal movie dekhne gaya?,"Haan bhai, lekin ending se pehle neend aa gayi! 😴🎥"
200
+ Tera net slow hai?,"Bhai, turtle race chal rahi hai! 🐢📶"
201
+ Tune ghar ka kaam kiya?,"Haan bhai, bas fridge kholne ka kaam! 😂🍕"
202
+ "Bhai, kal gym chale?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
203
+ Tune latest movie dekhi?,"Haan bhai, story se zyada neend interesting thi! 😴🎥"
204
+ Tu kal cricket khelne gaya?,"Haan bhai, par sirf toss jeeta! 😂🏏"
205
+ "Bhai, tera boss strict hai?","Bhai, usko toh chhutti ka spelling bhi nahi aata! 😂😎"
206
+ Tu kal late kyun aaya?,"Bhai, bed se alag hone ka mann nahi tha! 🛏️😂"
207
+ Tune naya song suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
208
+ Tera phone slow hai?,"Bhai, lagta hai snail bhi sharma jaye! 🐌📱😂"
209
+ Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins ko dekha! 😂🛍️"
210
+ "Bhai, kal gym gaya tha?","Nahi bhai, bas reels dekh ke calories jala raha hoon 😂📱"
211
+ Tune uska Insta story dekha?,"Haan bhai, full 'main character energy' thi! 🤩📸"
212
+ Tu kal late kyun aaya?,"Bhai, alarm aur meri dosti thodi toxic ho gayi hai 😴⏰"
213
+ Kal office mein kya scene tha?,"Bhai, same drama, different day! 😩💼"
214
+ Tera net slow hai?,"Bhai, lagta hai Airtel ne 2G ka throwback de diya! 🐢📶"
215
+ Tune naya phone liya?,"Haan bhai, EMI dekh ke ro raha hoon 💸😭"
216
+ Tu kal date pe gaya tha?,"Haan bhai, par bill bharne mein hi pyaar khatam ho gaya 😂💀"
217
+ Tune ghar ka kaam kiya?,"Bhai, bas online shopping ke cart saaf kiya 😂🛒"
218
+ Weekend pe kya scene hai?,"Bhai, bas bed aur Netflix ka serious relationship hai 🍿❤️"
219
+ Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai, neighbors pareshaan hai 😂🎧"
220
+ Tera dukh kya hai bhai?,"Bhai, dukh nahi… bas 'low battery' wali zindagi hai 😭🔋"
221
+ Tu kal gym gaya tha?,"Haan bhai, bas cardio ke naam pe water cooler tak chala 😂🚶‍♂️"
222
+ Bhai tera crush tera bhai ban gaya 😂,"Bhai, ab bas rakhi ki tayyari kar raha hoon 😂😭"
223
+ Tune naya web series dekha?,"Haan bhai, binge-watching Olympic level pe hai! 🎯📺"
224
+ Tera phone slow hai?,"Bhai, snail bhi sharma jaye aisi speed hai 🐌📱😂"
225
+ Bhai kal party mein kyun nahi aaya?,"Bhai, mera bed se commitment tha 🛏️❤️"
226
+ Tune ghar ka kaam kiya?,"Bhai, bas fridge open-close wala cardio kiya 😂🍕"
227
+ Tu kal cricket khelne gaya?,"Haan bhai, par ball se dosti nahi ho paayi 🏏😂"
228
+ Tera boss kaisa hai?,"Bhai, pura 'mood swing' ka calendar hai 😂😎"
229
+ Bhai kal kaam pe gaya?,"Haan bhai, attendance dene gaya tha bas 😂💼"
230
+ Tune naya phone liya?,"Haan bhai, EMI dekh ke laga phone nahi, loan liya hai 😂📱"
231
+ Tu kal gym gaya?,"Haan bhai, par bas mirror ke saamne flex kiya 😂💪"
232
+ "Bhai, weekend pe kya scene?","Bhai, bas 'napflix' and chill! 🛏️😎"
233
+ Tu kal shopping gaya tha?,"Haan bhai, lekin mannequins hi dekhe 😂🛍️"
234
+ Tune uska reply dekha?,"Haan bhai, bas 'seen' maar diya 💔😢"
235
+ Bhai kal milne chale?,"Bhai, ghar pe hi 'soft launch' ho raha hoon 😂🛋️"
236
+ Tu itna busy kyun hai?,"Bhai, life 'do not disturb' mode pe hai 😂📵"
237
+ Tu kal cricket khelne gaya?,"Haan bhai, par sirf shadow practice ki 😂🏏"
238
+ Tune ghar ka kaam kiya?,"Bhai, bas remote dhundhne ka kaam 😂📺"
239
+ Tera boss strict hai?,"Bhai, usko chhutti ka spelling bhi nahi aata 😂😎"
240
+ "Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai 😂😴"
241
+ Tune naya song suna?,"Haan bhai, ab toh ringtone bhi wahi hai 😂🎵"
242
+ Tera phone slow hai?,"Bhai, tortoise ko bhi sharam aa jaye 😂🐢"
243
+ Tu kal movie gaya tha?,"Haan bhai, par ending se pehle neend aa gayi 😂😴"
244
+ Tune naya meme dekha?,"Haan bhai, share karte karte battery khatam ho gayi 😂📱"
245
+ "Bhai, kal salary aayi?","Haan bhai, aur khatam bhi ho gayi 😂💸"
246
+ Tune naya gaana suna?,"Haan bhai, ab toh playlist ka raja ban gaya 😂🎧"
247
+ Tu kal late kyun aaya?,"Bhai, sapno ka 'overtime' ho gaya 😂💤"
248
+ Tune online shopping ki?,"Haan bhai, cart full, wallet empty 😂🛒"
249
+ "Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai 🥂😂"
250
+ Tune cricket dekha?,"Haan bhai, player se zyada umpire dekha 😂🏏"
251
+ "Bhai, tera boss strict hai?","Bhai, Monday ka human version hai 😂💼"
252
+ Tune movie dekhi?,"Haan bhai, par neend zyada interesting thi 😂😴"
253
+ Tu kal late kyun aaya?,"Bhai, bed ne break-up nahi diya 😂🛏️"
254
+ Tu cricket dekh raha hai?,"Haan bhai, TV ka remote nahi dekh raha 😂📺"
255
+ Tu kal gym gaya?,"Haan bhai, bas selfie kheechne 😂📸"
256
+ Tune ghar ka kaam kiya?,"Bhai, bas meme banane ka kaam 😂💻"
257
+ Tu kal shopping gaya?,"Haan bhai, lekin mannequins se hi baat ho gayi 😂🛍️"
258
+ Tera boss strict hai?,"Bhai, usko toh smile bhi paid leave pe milti hai 😂😎"
259
+ "Bhai, weekend pe kya scene?","Bhai, full 'ghar se hi ghar wapsi' 😂🏠"
260
+ Tune naya gaana suna?,"Haan bhai, ab toh lyrics bhi ratti ho gayi 😂🎵"
261
+ "Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai 😂🛌"
262
+ Tere boss ka mood kaisa hai aaj?,"Bhai, pura 'Monday on steroids' lag raha hai 😂💀"
263
+ Tu kal date pe gaya tha?,"Haan bhai, par bill bharte hi break-up soch raha tha 💸😭"
264
+ Bhai kal ka meeting kaisa tha?,"Bhai, bas Zoom ka background enjoy kiya 😂💻"
265
+ Tune uska message dekha?,"Haan bhai, reply nahi, bas 'seen' maar diya 💔👀"
266
+ "Bhai, exam kaisa gaya?","Bhai, bas pen chal raha tha… dimaag nahi 😂🧠"
267
+ Tere boss ne kuch bola?,"Haan bhai, lagta hai unka breakup hua hai 😂💀"
268
+ Tu weekend pe kya kar raha hai?,"Bhai, bas 'Netflix and snore' mode on hai 😂🍿😴"
269
+ Tune salary check ki?,"Haan bhai, but lagta hai HR ne 'prank' kiya hai 😂💸"
270
+ Tera WiFi slow hai?,"Bhai, snail bhi race jeet jayega 😂🐢"
271
+ Kal ka gym scene hai kya?,"Gym nahi bhai, sirf 'gymmedariyan' hai 😂💼"
272
+ "Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha 😂🤦‍♂️"
273
+ Tune naya filter try kiya?,"Haan bhai, asli se zyada sundar lag raha tha 😂📸"
274
+ Tera crush online hai kya?,"Haan bhai, par bas 'last seen' ka ehsaas de rahi hai 💔😂"
275
+ "Bhai, kal movie ka plan hai?","Bhai, 'pockets empty' ka plan hai 😂💸"
276
+ Tu gym join karega kya?,"Bhai, bas 'intentions fit' hai, body nahi 😂💪"
277
+ Tere boss ne kuch poocha?,"Haan bhai, bas HR jaisa dikh raha tha 😂💀"
278
+ Tune ghar ka kaam kiya?,"Bhai, bas dishes aur dreams donon dhoye 😂🍽️😴"
279
+ Tere weekend plans kya hai?,"Bhai, bas neend aur napka date hai 😂🛌"
280
+ Tu kal office gaya tha?,"Haan bhai, par attendance dene gaya tha bas 😂💼"
281
+ Tune ghar ka renovation kiya?,"Bhai, bas sofa ka position badla 😂🛋️"
282
+ Tu kal late kyun aaya?,"Bhai, traffic nahi, neend heavy thi 😂😴"
283
+ Tera net slow hai kya?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
284
+ "Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai 😂🏏"
285
+ Tu kal party gaya tha?,"Haan bhai, par free snacks pe zyada dhyan tha 😂🍕"
286
+ "Bhai, tere paas charger hai?","Haan bhai, par battery khatam hai 😂🔋"
287
+ Tune weekend pe kya kiya?,"Bhai, bas bed ke saath commitment nibha raha tha 😂🛏️"
288
+ "Bhai, salary gayi?","Haan bhai, udti chidiya bhi nahi thi, bas ud gayi 😂💸"
289
+ Tune naya meme dekha?,"Haan bhai, do baar share bhi kiya 😂📱"
290
+ "Bhai, tera pet bhag gaya?","Haan bhai, EMI dekh ke 😂🐕‍🦺"
291
+ Tera phone slow hai?,"Bhai, turtle bhi inspire ho jaye 😂🐢"
292
+ "Bhai, tu naya laptop le raha hai?","Haan bhai, EMI lene ka plan hai 😂💻"
293
+ Tune uska tweet dekha?,"Haan bhai, lagta hai usne Elon Musk ko hire kar liya 😂🐦"
294
+ "Bhai, kal late kyun aaya?","Bhai, neend ka heavy dose ho gaya tha 😂😴"
295
+ Tu kal gym gaya tha?,"Haan bhai, par bas locker ka lock khol ke aa gaya 😂🔒"
296
+ "Bhai, tera dukh kya hai?","Bhai, 'low balance' aur 'low battery' ek saath hai 😂💸🔋"
297
+ Tere gharwale strict hai?,"Bhai, 'WiFi password' se bhi zyada 😂🔒"
298
+ Tune naya filter try kiya?,"Haan bhai, asli se zyada fake lag raha tha 😂📸"
299
+ Tera dost cool hai?,"Bhai, uski coolness dekh ke AC bhi sharma jaye 😂❄️"
300
+ Tune ghar ka kaam kiya?,"Bhai, bas 'undo' button dhoond raha hoon 😂⏪"
301
+ Tu kal cricket khelne gaya?,"Haan bhai, par bas fielding se dushmani ho gayi 😂🏏"
302
+ "Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas 😂💼"
303
+ "Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota 😂💔🏏"
304
+ Tune naya song suna?,"Haan bhai, repeat pe chal raha hai 😂🎧"
305
+ Tera net slow hai?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
306
+ "Bhai, tune naya meme dekha?","Haan bhai, battery khatam kar diya share karte 😂📱"
307
+ "Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai 😂💀"
308
+ Tune kal gym join kiya?,"Haan bhai, par bas 'selfie membership' li hai 📸😂"
309
+ "Bhai, tu diet pe hai?","Haan bhai, bas 'pizza' aur 'biryani' ko healthy maan liya 😂🍕🍗"
310
+ Tere boss ka mood kaisa hai?,"Bhai, lagta hai unke 'data plan' ka bhi expiry ho gaya 😂📉"
311
+ Tu kal kitne baje soya?,"Bhai, bas 'Netflix' ne raat ka 'delete button' daba diya 😂📺"
312
+ "Bhai, tera crush tujhe bhool gaya?","Haan bhai, par Instagram memories nahi 😂💔📱"
313
+ Tune weekend pe kya kiya?,"Bhai, bas 'bed' se zyada serious relationship mein tha 😂🛏️"
314
+ Tu kal late kyun aaya?,"Bhai, traffic nahi, bas 'mood swing' heavy tha 😂🚦"
315
+ Tune naya song suna?,"Haan bhai, ab toh 'repeat' se bhi dosti ho gayi 😂🎧"
316
+ Tera dukh kya hai?,"Bhai, salary bhi 'fast forward' mode mein chali gayi 😂💸"
317
+ Tu kal cricket khel raha tha?,"Haan bhai, par fielding se 'breakup' ho gaya 😂🏏"
318
+ Tere gharwale strict hai?,"Bhai, WiFi password se bhi zyada 😂🔒"
319
+ Tune naya filter try kiya?,"Haan bhai, asli se zyada 'animated' lag raha tha 😂📸"
320
+ Tu kal date pe gaya tha?,"Haan bhai, par bas 'pocket money' ka breakup ho gaya 😂💸"
321
+ "Bhai, weekend pe kya plan hai?","Bhai, full 'bedflix and nap' mode on hai 😂🛌🍿"
322
+ Tu gym nahi gaya?,"Bhai, bas 'motivation' ne leave le liya 😂💪"
323
+ Tera net slow hai?,"Bhai, turtle bhi 'fast and furious' lag raha hai 😂🐢"
324
+ "Bhai, tu cricket dekh raha hai?","Haan bhai, par umpire zyada entertaining hai 😂👀"
325
+ Tune ghar ka kaam kiya?,"Bhai, bas 'undo button' dhoond raha hoon 😂⏪"
326
+ Tera phone slow hai?,"Bhai, snail bhi jeet jayega race mein 😂🐌"
327
+ Tune naya reel banaya?,"Haan bhai, viral nahi hua, bas family ne dekha 😂📱"
328
+ Tere boss ne kuch bola?,"Haan bhai, bas 'mental gym' karwa rahe the 😂💀"
329
+ "Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga 😂🏠"
330
+ Tune naya job apply kiya?,"Haan bhai, par HR ne 'seen' maar diya 😂💼"
331
+ Tu aaj kal busy hai?,"Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
332
+ "Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya 😂⏰"
333
+ Tu naya phone le raha hai?,"Haan bhai, par EMI ka dukh zyada hai 😂📱💸"
334
+ "Bhai, tera laptop slow hai?","Bhai, lagta hai 'Windows 95' par chal raha hai 😂💻"
335
+ Tune ghar ka renovation kiya?,"Bhai, bas 'sofa' ka angle change kiya 😂🛋️"
336
+ "Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai 😂🍿💸"
337
+ Tera dukh kya hai?,"Bhai, salary aur battery dono low hai 😂💸🔋"
338
+ Tune naya meme dekha?,"Haan bhai, share karte hi battery gayab 😂📱⚡"
339
+ Tu kal party gaya tha?,"Haan bhai, par free snacks hi target tha 😂🍕"
340
+ Tera boss kaisa hai?,"Bhai, 'Monday' se bhi zyada toxic hai 😂💀"
341
+ Tune ghar ka kaam kiya?,"Bhai, bas 'mind cleaning' kar raha tha 😂🧠"
342
+ "Bhai, kal cricket khela?","Haan bhai, bas ball dhoondhne gaya tha 😂🏏"
343
+ Tu kal late kyun aaya?,"Bhai, sapno mein fast forward ho gaya 😂💤"
344
+ "Bhai, tera net slow hai?","Haan bhai, '2G' bhi racer lag raha hai 😂🐢"
345
+ Tune naya game try kiya?,"Haan bhai, par boss ka 'deadline game' jeet gaya 😂🎮"
346
+ Tu aaj kal busy hai?,"Haan bhai, bas 'reality check' mein busy hoon 😂🔍"
347
+ "Bhai, kal kya kiya?","Bhai, bas 'procrastination' ka world record banaya 😂🏅"
348
+ Tu kal gym gaya tha?,"Haan bhai, bas 'reels' dekhne 😂📱"
349
+ "Bhai, tune naya reel dekha?","Haan bhai, copy paste bhi kar diya 😂🎥"
350
+ Tera dukh kya hai?,"Bhai, salary aayi bhi nahi gayi bhi 😂💸"
351
+ Tu kal cricket khel raha tha?,"Haan bhai, par sirf 'sledging' mein expert ho gaya 😂🏏"
352
+ "Bhai, weekend pe kya plan hai?","Bhai, bas 'ghar se sofa' tak travel hai 😂🛋️"
353
+ Tune kal movie dekhi?,"Haan bhai, bas ending hi nahi samjhi 😂🎬"
354
+ Tera net slow hai?,"Bhai, 'loading' se zyada 'buffering' hai 😂📶"
355
+ "Bhai, tu naya phone le raha hai?","Haan bhai, par 'battery life' ka breakup ho gaya 😂📱"
356
+ Tune ghar ka kaam kiya?,"Bhai, bas 'remote' ka workout kiya 😂📺"
357
+ Tu kal late kyun aaya?,"Bhai, bas 'snooze' pe 'snooze' maar raha tha 😂⏰"
358
+ Tune naya song suna?,"Haan bhai, ab toh 'repeat' pe chal raha hai 😂🎧"
359
+ Tera boss strict hai?,"Bhai, usko toh 'deadline' se bhi zyada 'deadline' ka darr hai 😂💼"
360
+ "Bhai, tu aaj kal busy hai?","Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
361
+ "Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas"
362
+ "Bhai, kal cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
363
+ "Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga"
364
+ "Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai"
365
+ "Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai"
366
+ "Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya"
367
+ "Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai"
368
+ "Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai"
369
+ "Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha"
370
+ "Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai"
371
+ "Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai"
372
+ "Bhai, kal kaam pe gaya?","Haan bhai, bas attendance dene gaya tha"
373
+ "Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
pages/Chat.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils import (
3
+ load_model,
4
+ load_finetuned_model,
5
+ generate_response,
6
+ get_hf_token
7
+ )
8
+ import os
9
+ import json
10
+ from datetime import datetime
11
+
12
+ st.set_page_config(page_title="Gemma Chat", layout="wide")
13
+
14
+ # -------------------------------
15
+ # 💡 Theme Toggle
16
+ # -------------------------------
17
+ dark_mode = st.sidebar.toggle("🌙 Dark Mode", value=False)
18
+
19
+ if dark_mode:
20
+ st.markdown(
21
+ """
22
+ <style>
23
+ body { background-color: #1e1e1e; color: #ffffff; }
24
+ .stTextInput, .stTextArea, .stSelectbox, .stSlider { color: #ffffff !important; }
25
+ </style>
26
+ """, unsafe_allow_html=True
27
+ )
28
+
29
+ st.title("💬 Chat with Gemma Model")
30
+
31
+ # -------------------------------
32
+ # 📌 Model Source Selection
33
+ # -------------------------------
34
+ model_source = st.sidebar.radio("📌 Select Model Source", ["Local (.pt)", "Hugging Face"])
35
+
36
+ # -------------------------------
37
+ # 🔥 Dynamic Model List
38
+ # -------------------------------
39
+ if model_source == "Local (.pt)":
40
+ model_dir = "models"
41
+ if not os.path.exists(model_dir):
42
+ os.makedirs(model_dir)
43
+
44
+ local_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
45
+
46
+ if local_models:
47
+ selected_model = st.sidebar.selectbox("🛠️ Select Local Model", local_models)
48
+ model_path = os.path.join(model_dir, selected_model)
49
+ else:
50
+ st.warning("⚠️ No fine-tuned models found. Fine-tune a model first.")
51
+ st.stop()
52
+
53
+ else:
54
+ hf_models = [
55
+ "google/gemma-3-1b-it",
56
+ "google/gemma-3-4b-pt",
57
+ "google/gemma-3-4b-it",
58
+ "google/gemma-3-12b-pt",
59
+ "google/gemma-3-12b-it",
60
+ "google/gemma-3-27b-pt",
61
+ "google/gemma-3-27b-it"
62
+ ]
63
+ selected_model = st.sidebar.selectbox("🛠️ Select Hugging Face Model", hf_models)
64
+ model_path = None
65
+
66
+ # -------------------------------
67
+ # 🔥 Model Loading
68
+ # -------------------------------
69
+ hf_token = get_hf_token()
70
+
71
+ if model_source == "Local (.pt)":
72
+ tokenizer, model = load_model("google/gemma-3-1b-it", hf_token) # Base model first
73
+ model = load_finetuned_model(model, model_path)
74
+ if model:
75
+ st.success(f"✅ Local fine-tuned model loaded: `{selected_model}`")
76
+ else:
77
+ st.error("❌ Failed to load local model.")
78
+ st.stop()
79
+
80
+ else:
81
+ tokenizer, model = load_model(selected_model, hf_token)
82
+ if model:
83
+ st.success(f"✅ Hugging Face model loaded: `{selected_model}`")
84
+ else:
85
+ st.error("❌ Failed to load Hugging Face model.")
86
+ st.stop()
87
+
88
+ # -------------------------------
89
+ # ⚙️ Model Configuration Panel
90
+ # -------------------------------
91
+ st.sidebar.header("⚙️ Model Configuration")
92
+ temperature = st.sidebar.slider("🔥 Temperature", 0.1, 1.5, 0.7, 0.1)
93
+ top_p = st.sidebar.slider("🎯 Top-p", 0.1, 1.0, 0.9, 0.1)
94
+ repetition_penalty = st.sidebar.slider("🔁 Repetition Penalty", 0.5, 2.0, 1.0, 0.1)
95
+
96
+ # -------------------------------
97
+ # 💬 Chat Interface
98
+ # -------------------------------
99
+ if "conversation" not in st.session_state:
100
+ st.session_state.conversation = []
101
+
102
+ prompt = st.text_area("💬 Enter your message:", "Hello, how are you?", key="prompt", height=100)
103
+ max_length = st.slider("📏 Max Response Length", min_value=50, max_value=1000, value=300, step=50)
104
+
105
+ # -------------------------------
106
+ # 🚀 Streaming Response Function
107
+ # -------------------------------
108
+ def stream_response():
109
+ """
110
+ Streams the response token by token.
111
+ """
112
+ response = generate_response(prompt, model, tokenizer, max_length)
113
+
114
+ if response:
115
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
116
+ st.session_state.conversation.append({"sender": "👤 You", "message": prompt, "timestamp": timestamp})
117
+ st.session_state.conversation.append({"sender": "🤖 AI", "message": response, "timestamp": timestamp})
118
+ return response
119
+ else:
120
+ st.error("❌ Failed to generate response.")
121
+ return None
122
+
123
+ # -------------------------------
124
+ # 🎯 Conversation Controls
125
+ # -------------------------------
126
+ col1, col2, col3 = st.columns([1, 1, 1])
127
+
128
+ if col1.button("🚀 Generate (CTRL+Enter)", help="Use CTRL + Enter to generate"):
129
+ stream_response()
130
+
131
+ if col2.button("🗑️ Clear Conversation"):
132
+ st.session_state.conversation = []
133
+
134
+ # Export & Import
135
+ if col3.download_button("📥 Export Chat", json.dumps(st.session_state.conversation, indent=4), "chat_history.json"):
136
+ st.success("✅ Chat exported successfully!")
137
+
138
+ uploaded_file = st.file_uploader("📤 Import Conversation", type=["json"])
139
+
140
+ if uploaded_file is not None:
141
+ st.session_state.conversation = json.load(uploaded_file)
142
+ st.success("✅ Conversation imported successfully!")
143
+
144
+ # -------------------------------
145
+ # 🛠️ Display Conversation
146
+ # -------------------------------
147
+ st.subheader("📜 Conversation History")
148
+
149
+ for msg in st.session_state.conversation:
150
+ with st.container():
151
+ st.markdown(f"**{msg['sender']}** \n🕒 {msg['timestamp']}")
152
+ st.write(msg['message'])
153
+
pages/Conversion.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils import load_model, convert_to_torchscript, convert_to_onnx, get_hf_token
3
+
4
+ st.title("🔧 Model Conversion")
5
+
6
+ # Load the HF token from utils
7
+ hf_token = get_hf_token()
8
+
9
+ # Load the model
10
+ model_path = "fine_tuned_model.pt"
11
+ tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
12
+
13
+ conversion_option = st.selectbox("Select Conversion Format", ["TorchScript", "ONNX"])
14
+
15
+ if st.button("Convert Model"):
16
+ if conversion_option == "TorchScript":
17
+ with st.spinner("Converting to TorchScript..."):
18
+ ts_model = convert_to_torchscript(model)
19
+ st.success("Model converted to TorchScript!")
20
+
21
+ elif conversion_option == "ONNX":
22
+ with st.spinner("Converting to ONNX..."):
23
+ onnx_path = convert_to_onnx(model)
24
+ st.success("Model converted to ONNX!")
pages/Dataset_Management.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import os
7
+ from utils import (
8
+ load_dataset,
9
+ save_dataset,
10
+ clean_dataset,
11
+ compute_dataset_score,
12
+ detect_outliers,
13
+ apply_transformation,
14
+ list_datasets,
15
+ detect_inconsistent_types
16
+ )
17
+
18
+ # -------------------------------
19
+ # Constants & Setup
20
+ # -------------------------------
21
+ DATASET_DIR = "datasets"
22
+ DEFAULT_DATASET = "train_data.csv"
23
+ os.makedirs(DATASET_DIR, exist_ok=True) # Ensure directory exists
24
+
25
+ # -------------------------------
26
+ # Sidebar: Dataset Selection
27
+ # -------------------------------
28
+ st.sidebar.header("📊 Dataset Selection")
29
+
30
+ # List available datasets from the datasets folder
31
+ available_datasets = list_datasets(DATASET_DIR)
32
+ dataset_choice = st.sidebar.radio("Choose Dataset Source:", ["Select Existing Dataset", "Upload New Dataset"])
33
+
34
+ dataset_path = None
35
+
36
+ if dataset_choice == "Select Existing Dataset":
37
+ if available_datasets:
38
+ selected_dataset = st.sidebar.selectbox("Select Dataset:", available_datasets)
39
+ dataset_path = os.path.join(DATASET_DIR, selected_dataset)
40
+ st.sidebar.success(f"Using `{selected_dataset}` dataset.")
41
+ else:
42
+ st.sidebar.warning("No datasets found. Please upload a new dataset.")
43
+ elif dataset_choice == "Upload New Dataset":
44
+ uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSON, or Excel)", type=["csv", "json", "xlsx"])
45
+ if uploaded_file:
46
+ file_ext = uploaded_file.name.split('.')[-1].lower()
47
+ try:
48
+ if file_ext == "csv":
49
+ new_df = pd.read_csv(uploaded_file)
50
+ elif file_ext == "json":
51
+ new_df = pd.json_normalize(json.load(uploaded_file))
52
+ elif file_ext == "xlsx":
53
+ new_df = pd.read_excel(uploaded_file)
54
+ else:
55
+ st.error("Unsupported file format.")
56
+ st.stop()
57
+ except Exception as e:
58
+ st.error(f"Error reading file: {e}")
59
+ st.stop()
60
+
61
+ # Save the new dataset with its filename
62
+ dataset_path = os.path.join(DATASET_DIR, uploaded_file.name)
63
+ save_dataset(new_df, dataset_path)
64
+ st.sidebar.success(f"Dataset `{uploaded_file.name}` uploaded successfully!")
65
+ available_datasets = list_datasets(DATASET_DIR) # Refresh list
66
+ else:
67
+ st.sidebar.warning("Please upload a dataset.")
68
+
69
+ # -------------------------------
70
+ # Load the Selected Dataset
71
+ # -------------------------------
72
+ if dataset_path:
73
+ df = load_dataset(dataset_path)
74
+ if df.empty:
75
+ st.warning("Dataset is empty or failed to load.")
76
+ else:
77
+ df = pd.DataFrame()
78
+ st.warning("No dataset selected. Please choose or upload a dataset.")
79
+
80
+ # -------------------------------
81
+ # Main App Title & Description
82
+ # -------------------------------
83
+ st.title("📊 The Data Hub")
84
+
85
+ # -------------------------------
86
+ # Tabs for Operations
87
+ # -------------------------------
88
+ tabs = st.tabs([
89
+ "View & Summary", "Clean Data",
90
+ "Visualize Data", "Data Profiling",
91
+ "Outlier Detection", "Custom Transformations",
92
+ "Export"
93
+ ])
94
+
95
+ # -------------------------------
96
+ # Tab 1: View & Summary
97
+ # -------------------------------
98
+ with tabs[0]:
99
+ st.subheader("📋 Current Dataset Preview")
100
+ if not df.empty:
101
+ st.dataframe(df.head(10))
102
+ st.markdown("#### 🔎 Basic Statistics")
103
+ st.write(df.describe(include="all"))
104
+ else:
105
+ st.warning("No dataset available. Please choose or upload a dataset.")
106
+
107
+ # -------------------------------
108
+ # Tab 2: Clean Data
109
+ # -------------------------------
110
+ with tabs[1]:
111
+ st.subheader("🧼 Clean Your Dataset")
112
+ if not df.empty:
113
+ remove_duplicates = st.checkbox("Remove Duplicate Rows", value=True)
114
+ fill_missing = st.checkbox("Fill Missing Values", value=False)
115
+ fill_value = st.text_input("Fill missing values with:", value="0")
116
+
117
+ st.markdown("#### Optional: Rename Columns")
118
+ new_names = {}
119
+ for col in df.columns:
120
+ new_names[col] = st.text_input(f"Rename column '{col}'", value=col)
121
+
122
+ if st.button("Clean Dataset"):
123
+ cleaned_df = clean_dataset(df, remove_duplicates, fill_missing, fill_value)
124
+ cleaned_df = cleaned_df.rename(columns=new_names)
125
+ save_dataset(cleaned_df, dataset_path)
126
+ st.success("✅ Dataset cleaned successfully!")
127
+ st.dataframe(cleaned_df.head())
128
+ df = cleaned_df
129
+ else:
130
+ st.warning("No dataset available for cleaning.")
131
+
132
+ # -------------------------------
133
+ # Tab 3: Visualize Data (Fixed KeyError Issue)
134
+ # -------------------------------
135
+ with tabs[2]:
136
+ st.subheader("📊 Visualize Your Data")
137
+
138
+ if not df.empty:
139
+ viz_type = st.selectbox("Select Visualization Type", ["Histogram", "Scatter", "Box Plot", "Heatmap", "Line Chart"])
140
+ numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
141
+
142
+ if numeric_cols:
143
+ # Validate column selection
144
+ col = st.selectbox("Select Column", numeric_cols)
145
+
146
+ if col: # Ensure valid column selection
147
+ fig, ax = plt.subplots()
148
+
149
+ if viz_type == "Histogram":
150
+ ax.hist(df[col].dropna(), bins=20, color="skyblue", edgecolor="black")
151
+ elif viz_type == "Box Plot":
152
+ sns.boxplot(x=df[col].dropna(), ax=ax)
153
+ elif viz_type == "Scatter":
154
+ x_col = st.selectbox("X-axis", numeric_cols)
155
+ y_col = st.selectbox("Y-axis", numeric_cols)
156
+ if x_col and y_col:
157
+ ax.scatter(df[x_col], df[y_col], color="green")
158
+ elif viz_type == "Heatmap":
159
+ corr = df[numeric_cols].corr()
160
+ sns.heatmap(corr, annot=True, cmap="coolwarm", ax=ax)
161
+ elif viz_type == "Line Chart":
162
+ ax.plot(df.index, df[col], marker="o")
163
+
164
+ st.pyplot(fig)
165
+ else:
166
+ st.warning("Please select a valid column.")
167
+ else:
168
+ st.warning("No numeric columns available for visualization.")
169
+ else:
170
+ st.warning("No dataset available for visualization.")
171
+
172
+ # -------------------------------
173
+ # Tab 4: Data Profiling
174
+ # -------------------------------
175
+ with tabs[3]:
176
+ if not df.empty:
177
+
178
+ # -------------------------------
179
+ # 1. General Dataset Info
180
+ # -------------------------------
181
+ st.markdown("### 🛠️ General Information")
182
+ st.write(f"✅ **Total Rows:** `{df.shape[0]}`")
183
+ st.write(f"✅ **Total Columns:** `{df.shape[1]}`")
184
+ st.write(f"✅ **Memory Usage:** `{df.memory_usage(deep=True).sum() / (1024 ** 2):.2f} MB`")
185
+ st.write(f"✅ **Dataset Shape:** `{df.shape}`")
186
+
187
+ # -------------------------------
188
+ # 2. Dataset Quality Score
189
+ # -------------------------------
190
+ st.markdown("### 📊 Dataset Quality Score")
191
+ score = compute_dataset_score(df)
192
+ st.success(f"💯 Dataset Quality Score: `{score} / 100`")
193
+
194
+ # -------------------------------
195
+ # 3. Column Overview with Stats
196
+ # -------------------------------
197
+ st.markdown("### 🔥 Column Overview")
198
+
199
+ # Numeric and categorical columns
200
+ numeric_cols = df.select_dtypes(include=["number"]).columns
201
+ categorical_cols = df.select_dtypes(include=["object"]).columns
202
+
203
+ profile = pd.DataFrame({
204
+ "Column": df.columns,
205
+ "Data Type": df.dtypes.values,
206
+ "Missing Values": df.isnull().sum().values,
207
+ "Missing %": (df.isnull().sum() / len(df) * 100).values,
208
+ "Unique Values": df.nunique().values
209
+ })
210
+
211
+ # Add numeric statistics
212
+ if len(numeric_cols) > 0:
213
+ numeric_stats = pd.DataFrame({
214
+ "Column": numeric_cols,
215
+ "Min": df[numeric_cols].min().values,
216
+ "Max": df[numeric_cols].max().values,
217
+ "Mean": df[numeric_cols].mean().values,
218
+ "Std Dev": df[numeric_cols].std().values,
219
+ "Skewness": df[numeric_cols].skew().values,
220
+ "Kurtosis": df[numeric_cols].kurt().values
221
+ })
222
+
223
+ # Merge stats with the profile
224
+ profile = profile.merge(numeric_stats, on="Column", how="left")
225
+
226
+ st.dataframe(profile)
227
+
228
+ # -------------------------------
229
+ # 4. Missing Values Visualization
230
+ # -------------------------------
231
+ st.markdown("### 🔎 Missing Values Distribution")
232
+ missing_values = df.isnull().sum()
233
+ missing_values = missing_values[missing_values > 0]
234
+
235
+ if not missing_values.empty:
236
+ fig, ax = plt.subplots(figsize=(12, 5))
237
+ sns.barplot(x=missing_values.index, y=missing_values.values, ax=ax, color="skyblue")
238
+ ax.set_title("Missing Values per Column")
239
+ ax.set_ylabel("Missing Count")
240
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
241
+ st.pyplot(fig)
242
+ else:
243
+ st.success("No missing values found!")
244
+
245
+ # -------------------------------
246
+ # 5. Duplicates Detection
247
+ # -------------------------------
248
+ st.markdown("### 🔥 Duplicates & Constant Columns Detection")
249
+
250
+ # Duplicates
251
+ duplicate_count = df.duplicated().sum()
252
+ st.write(f"🔁 **Duplicate Rows:** `{duplicate_count}`")
253
+
254
+ # Constant Columns
255
+ constant_cols = [col for col in df.columns if df[col].nunique() == 1]
256
+ if constant_cols:
257
+ st.write(f"🚩 **Constant Columns:** `{constant_cols}`")
258
+ else:
259
+ st.success("No constant columns detected!")
260
+
261
+ # -------------------------------
262
+ # 6. Cardinality Analysis
263
+ # -------------------------------
264
+ st.markdown("### 🧬 Cardinality Analysis")
265
+
266
+ high_cardinality = [col for col in df.columns if df[col].nunique() > len(df) * 0.8]
267
+ if high_cardinality:
268
+ st.write(f"🔢 **High-Cardinality Columns:** `{high_cardinality}`")
269
+ else:
270
+ st.success("No high-cardinality columns detected!")
271
+
272
+ # -------------------------------
273
+ # 7. Top Frequent & Rare Values
274
+ # -------------------------------
275
+ st.markdown("### 🎯 Frequent & Rare Values")
276
+
277
+ for col in categorical_cols:
278
+ st.write(f"✅ **{col}**")
279
+
280
+ top_values = df[col].value_counts().nlargest(5)
281
+ rare_values = df[col].value_counts().nsmallest(5)
282
+
283
+ st.write("📊 **Top Frequent Values:**")
284
+ st.dataframe(top_values)
285
+
286
+ st.write("🧪 **Rare Values:**")
287
+ st.dataframe(rare_values)
288
+
289
+ # -------------------------------
290
+ # 8. Correlation Matrix
291
+ # -------------------------------
292
+ st.markdown("### 📊 Correlation Matrix")
293
+
294
+ if len(numeric_cols) > 1:
295
+ corr = df[numeric_cols].corr()
296
+
297
+ fig, ax = plt.subplots(figsize=(12, 8))
298
+ sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True, ax=ax)
299
+ st.pyplot(fig)
300
+ else:
301
+ st.info("Not enough numeric columns for correlation analysis.")
302
+
303
+ # -------------------------------
304
+ # 9. Pair Plot (Numerical Relationships)
305
+ # -------------------------------
306
+ st.markdown("### 🔥 Pair Plot (Numerical Relationships)")
307
+
308
+ if len(numeric_cols) >= 2:
309
+ pairplot = sns.pairplot(df[numeric_cols], diag_kind='kde')
310
+ st.pyplot(pairplot.fig)
311
+ else:
312
+ st.info("Not enough numeric columns for pair plot visualization.")
313
+
314
+ # -------------------------------
315
+ # 10. Outlier Detection
316
+ # -------------------------------
317
+ st.markdown("### 🚩 Outlier Detection")
318
+
319
+ outliers = detect_outliers(df)
320
+ if outliers:
321
+ st.write("✅ **Outliers Detected:**")
322
+ st.dataframe(pd.DataFrame(outliers.items(), columns=["Column", "Outlier Count"]))
323
+ else:
324
+ st.success("No significant outliers detected!")
325
+
326
+ # -------------------------------
327
+ # 11. Inconsistent Data Types
328
+ # -------------------------------
329
+ st.markdown("### 🚫 Inconsistent Data Types")
330
+
331
+ inconsistent_types = detect_inconsistent_types(df)
332
+ if inconsistent_types:
333
+ st.write("⚠️ **Inconsistent Data Types Detected:**")
334
+ st.write(inconsistent_types)
335
+ else:
336
+ st.success("No inconsistent data types detected!")
337
+
338
+ else:
339
+ st.warning("No dataset available for profiling.")
340
+
341
+ # -------------------------------
342
+ # Tab 5: Outlier Detection
343
+ # -------------------------------
344
+ with tabs[4]:
345
+ st.subheader("🚀 Outlier Detection")
346
+ if not df.empty:
347
+ outliers = detect_outliers(df)
348
+ st.write(outliers)
349
+ else:
350
+ st.warning("No dataset available for outlier detection.")
351
+
352
+ # -------------------------------
353
+ # Tab 6: Export
354
+ # -------------------------------
355
+ with tabs[5]:
356
+ st.subheader("📤 Export Dataset")
357
+ export_format = st.selectbox("Export Format", ["CSV", "Excel", "JSON"])
358
+ if not df.empty:
359
+ st.download_button("Download", df.to_csv(index=False), f"dataset.{export_format.lower()}")
360
+
pages/Finetune.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ from datetime import datetime
6
+ from utils import (
7
+ load_model,
8
+ get_hf_token,
9
+ simulate_training,
10
+ plot_training_metrics,
11
+ load_finetuned_model,
12
+ save_model
13
+ )
14
+
15
+ st.title("🔥 Fine-tune the Gemma Model")
16
+
17
+ # -------------------------------
18
+ # Finetuning Option Selection
19
+ # -------------------------------
20
+ finetune_option = st.radio("Select Finetuning Option", ["Fine-tune from scratch", "Refinetune existing model"])
21
+
22
+ # -------------------------------
23
+ # Model Selection Logic
24
+ # -------------------------------
25
+ selected_model = None
26
+ saved_model_path = None
27
+
28
+ if finetune_option == "Fine-tune from scratch":
29
+ # Display Hugging Face model list
30
+ model_list = [
31
+ "google/gemma-3-1b-pt",
32
+ "google/gemma-3-1b-it",
33
+ "google/gemma-3-4b-pt",
34
+ "google/gemma-3-4b-it",
35
+ "google/gemma-3-12b-pt",
36
+ "google/gemma-3-12b-it",
37
+ "google/gemma-3-27b-pt",
38
+ "google/gemma-3-27b-it"
39
+ ]
40
+ selected_model = st.selectbox("🛠️ Select Gemma Model to Fine-tune", model_list)
41
+
42
+ elif finetune_option == "Refinetune existing model":
43
+ # Dynamically list all saved models from the /models folder
44
+ model_dir = "models"
45
+
46
+ if os.path.exists(model_dir):
47
+ saved_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
48
+ else:
49
+ saved_models = []
50
+
51
+ if saved_models:
52
+ saved_model_path = st.selectbox("Select a saved model to re-finetune", saved_models)
53
+ saved_model_path = os.path.join(model_dir, saved_model_path)
54
+ st.success(f"✅ Selected model for refinement: `{saved_model_path}`")
55
+ else:
56
+ st.warning("⚠️ No saved models found! Switching to fine-tuning from scratch.")
57
+ finetune_option = "Fine-tune from scratch"
58
+
59
+ # -------------------------------
60
+ # Dataset Selection
61
+ # -------------------------------
62
+
63
+ st.subheader("📚 Dataset Selection")
64
+
65
+ # Dataset source selection
66
+ dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
67
+
68
+ dataset_path = "train_data.csv"
69
+
70
+ if dataset_option == "Upload New Dataset":
71
+ uploaded_file = st.file_uploader("📤 Upload Dataset (CSV or JSON)", type=["csv", "json"])
72
+
73
+ if uploaded_file is not None:
74
+ # Handle CSV or JSON upload
75
+ if uploaded_file.name.endswith(".csv"):
76
+ new_data = pd.read_csv(uploaded_file)
77
+ elif uploaded_file.name.endswith(".json"):
78
+ json_data = json.load(uploaded_file)
79
+ new_data = pd.json_normalize(json_data)
80
+ else:
81
+ st.error("❌ Unsupported file format. Please upload CSV or JSON.")
82
+ st.stop()
83
+
84
+ # Append or create new dataset
85
+ if os.path.exists(dataset_path):
86
+ new_data.to_csv(dataset_path, mode='a', index=False, header=False)
87
+ st.success(f"✅ Data appended to `{dataset_path}`!")
88
+ else:
89
+ new_data.to_csv(dataset_path, index=False)
90
+ st.success(f"✅ Dataset saved as `{dataset_path}`!")
91
+
92
+ elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
93
+ if os.path.exists(dataset_path):
94
+ st.success("✅ Using existing `train_data.csv` for fine-tuning.")
95
+ else:
96
+ st.error("❌ `train_data.csv` not found! Please upload a new dataset.")
97
+ st.stop()
98
+
99
+ # -------------------------------
100
+ # Hyperparameters Configuration
101
+ # -------------------------------
102
+ learning_rate = st.number_input("📊 Learning Rate", value=1e-4, format="%.5f")
103
+ batch_size = st.number_input("🛠️ Batch Size", value=16, step=1)
104
+ epochs = st.number_input("⏱️ Epochs", value=3, step=1)
105
+
106
+ # -------------------------------
107
+ # Fine-tuning Execution
108
+ # -------------------------------
109
+ if st.button("🚀 Start Fine-tuning"):
110
+ st.info(f"Fine-tuning process initiated...")
111
+
112
+ # Retrieve Hugging Face Token
113
+ hf_token = get_hf_token()
114
+
115
+ # Model loading logic
116
+ if finetune_option == "Refinetune existing model" and saved_model_path:
117
+ # Load the base model first
118
+ tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
119
+
120
+ # Load the saved model checkpoint for re-finetuning
121
+ model = load_finetuned_model(model, saved_model_path)
122
+
123
+ if model:
124
+ st.success(f"✅ Loaded saved model: `{saved_model_path}` for refinement!")
125
+ else:
126
+ st.error("❌ Failed to load the saved model. Aborting.")
127
+ st.stop()
128
+
129
+ else:
130
+ # Fine-tune from scratch (load base model)
131
+ if not selected_model:
132
+ st.error("❌ Please select a model to fine-tune.")
133
+ st.stop()
134
+
135
+ tokenizer, model = load_model(selected_model, hf_token)
136
+
137
+ if model:
138
+ st.success(f"✅ Base model loaded: `{selected_model}`")
139
+ else:
140
+ st.error("❌ Failed to load the base model. Aborting.")
141
+ st.stop()
142
+
143
+ # Simulate fine-tuning loop
144
+ progress_bar = st.progress(0)
145
+ training_placeholder = st.empty()
146
+
147
+ for epoch, losses, accs in simulate_training(epochs):
148
+ fig = plot_training_metrics(epoch, losses, accs)
149
+ training_placeholder.pyplot(fig)
150
+ progress_bar.progress(epoch / epochs)
151
+
152
+ # Save fine-tuned model with timestamp
153
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
154
+ new_model_name = f"models/fine_tuned_model_{selected_model.replace('/', '_')}_{timestamp}.pt"
155
+
156
+ # Save the fine-tuned model
157
+ saved_model_path = save_model(model, new_model_name)
158
+
159
+ if saved_model_path:
160
+ st.success(f"✅ Fine-tuning completed! Model saved as `{saved_model_path}`")
161
+
162
+ # Load the fine-tuned model for immediate inference
163
+ model = load_finetuned_model(model, saved_model_path)
164
+
165
+ if model:
166
+ st.success("🛠️ Fine-tuned model loaded and ready for inference!")
167
+ else:
168
+ st.error("❌ Failed to load the fine-tuned model for inference.")
169
+ else:
170
+ st.error("❌ Failed to save the fine-tuned model.")
requirements.txt CHANGED
@@ -3,4 +3,10 @@ pandas==2.2.3
3
  numpy==2.2.4
4
  torch==2.6.0
5
  transformers @ git+https://github.com/huggingface/[email protected]
6
- matplotlib==3.10.1
 
 
 
 
 
 
 
3
  numpy==2.2.4
4
  torch==2.6.0
5
  transformers @ git+https://github.com/huggingface/[email protected]
6
+ matplotlib==3.10.1
7
+ rich>=13.1.0
8
+ FuzzyTM>=0.4.0
9
+ requests>=2.28.0
10
+ xlsxwriter>=3.0.1
11
+ python-dotenv>=0.19.0
12
+ scipy>=1.7.3
utils.py ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import matplotlib.pyplot as plt
7
+ import time
8
+ import json
9
+ import re
10
+ import os
11
+ import asyncio
12
+ from dotenv import load_dotenv
13
+ from scipy.stats import skew, kurtosis, zscore
14
+
15
+ # -------------------------------
16
+ # Environment and Token Management
17
+ # -------------------------------
18
+
19
+ # Load environment variables from .env file in local development
20
+ load_dotenv()
21
+
22
+ def get_hf_token():
23
+ """
24
+ Retrieves HF token from secrets or .env file.
25
+ """
26
+ token = os.getenv("HF_TOKEN") # Prioritize environment variable
27
+
28
+ # If not found, fallback to Streamlit secrets
29
+ if not token:
30
+ try:
31
+ token = st.secrets["HF_TOKEN"]
32
+ except (FileNotFoundError, KeyError):
33
+ st.error("❌ HF_TOKEN not found. Add it to .env or secrets.toml.")
34
+ return None
35
+
36
+ return token
37
+
38
+
39
+ # -------------------------------
40
+ # Model Loading and Management
41
+ # -------------------------------
42
+
43
+ async def async_load(model_id: str):
44
+ """
45
+ Dummy async function to initialize the event loop.
46
+ """
47
+ await asyncio.sleep(0.1)
48
+
49
+ @st.cache_resource
50
+ def load_model(model_id: str, token: str, checkpoint_path: str = None):
51
+ """
52
+ Loads and caches the Gemma model and tokenizer with the Hugging Face token.
53
+
54
+ Args:
55
+ model_id (str): The Hugging Face model ID.
56
+ token (str): The authentication token.
57
+ checkpoint_path (str): Optional path to a fine-tuned model checkpoint.
58
+
59
+ Returns:
60
+ tuple: tokenizer, model
61
+ """
62
+ try:
63
+ asyncio.run(async_load(model_id))
64
+
65
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
66
+ model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
67
+
68
+ # Load fine-tuned checkpoint if provided
69
+ if checkpoint_path and os.path.exists(checkpoint_path):
70
+ model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
71
+ model.eval()
72
+ st.success("✅ Fine-tuned model loaded successfully!")
73
+
74
+ return tokenizer, model
75
+
76
+ except Exception as e:
77
+ st.error(f"❌ Model loading failed: {e}")
78
+ return None, None
79
+
80
+
81
+ # -------------------------------
82
+ # Model Saving Function
83
+ # -------------------------------
84
+
85
+ def save_model(model, model_name: str):
86
+ """
87
+ Saves the fine-tuned model to the specified path.
88
+
89
+ Args:
90
+ model (torch.nn.Module): The PyTorch model instance.
91
+ model_name (str): The file path to save the model.
92
+
93
+ Returns:
94
+ str: The path where the model is saved.
95
+ """
96
+ try:
97
+ # Ensure the models directory exists
98
+ os.makedirs(os.path.dirname(model_name), exist_ok=True)
99
+
100
+ # Save the model
101
+ torch.save(model.state_dict(), model_name)
102
+ st.success(f"✅ Model saved successfully at `{model_name}`")
103
+ return model_name
104
+ except Exception as e:
105
+ st.error(f"❌ Failed to save model: {e}")
106
+ return None
107
+
108
+
109
+ # -------------------------------
110
+ # File Processing and Cleaning
111
+ # -------------------------------
112
+
113
+ def preprocess_data(uploaded_file, file_extension):
114
+ """
115
+ Reads the uploaded file and returns a processed version.
116
+ Supports CSV, JSONL, and TXT.
117
+ """
118
+ try:
119
+ if file_extension == "csv":
120
+ return pd.read_csv(uploaded_file)
121
+
122
+ elif file_extension == "jsonl":
123
+ data = [json.loads(line) for line in uploaded_file.readlines()]
124
+ try:
125
+ return pd.DataFrame(data)
126
+ except Exception:
127
+ st.warning("⚠️ Unable to convert JSONL to table. Previewing raw JSON.")
128
+ return data
129
+
130
+ elif file_extension == "txt":
131
+ text_data = uploaded_file.read().decode("utf-8")
132
+ return text_data.splitlines()
133
+
134
+ except Exception as e:
135
+ st.error(f"❌ Error processing file: {e}")
136
+ return None
137
+
138
+
139
+ def clean_text(text, lowercase=True, remove_punctuation=True):
140
+ """
141
+ Cleans text data by applying basic normalization.
142
+ """
143
+ if lowercase:
144
+ text = text.lower()
145
+ if remove_punctuation:
146
+ text = re.sub(r'[^\w\s]', '', text)
147
+ return text
148
+
149
+
150
+ # -------------------------------
151
+ # Model Conversion and Quantization
152
+ # -------------------------------
153
+
154
+ def quantize_model(model):
155
+ """
156
+ Applies dynamic quantization.
157
+ """
158
+ try:
159
+ quantized_model = torch.quantization.quantize_dynamic(
160
+ model, {torch.nn.Linear}, dtype=torch.qint8
161
+ )
162
+ st.success("✅ Model quantized successfully!")
163
+ return quantized_model
164
+ except Exception as e:
165
+ st.error(f"❌ Quantization failed: {e}")
166
+ return model
167
+
168
+
169
+ def convert_to_torchscript(model, output_path="model_ts.pt"):
170
+ """
171
+ Converts the model to TorchScript format.
172
+ """
173
+ try:
174
+ example_input = torch.randint(0, 100, (1, 10))
175
+ traced_model = torch.jit.trace(model, example_input)
176
+ traced_model.save(output_path)
177
+ return output_path
178
+ except Exception as e:
179
+ st.error(f"❌ TorchScript conversion failed: {e}")
180
+ return None
181
+
182
+
183
+ def convert_to_onnx(model, output_path="model.onnx"):
184
+ """
185
+ Converts the model to ONNX format.
186
+ """
187
+ try:
188
+ dummy_input = torch.randint(0, 100, (1, 10))
189
+ torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
190
+ return output_path
191
+ except Exception as e:
192
+ st.error(f"❌ ONNX conversion failed: {e}")
193
+ return None
194
+
195
+
196
+ # -------------------------------
197
+ # Model Inference and Training
198
+ # -------------------------------
199
+
200
+ def simulate_training(num_epochs):
201
+ """
202
+ Simulates a training loop for demonstration.
203
+ Yields current epoch, loss values, and accuracy values.
204
+ """
205
+ loss_values = []
206
+ accuracy_values = []
207
+ for epoch in range(1, num_epochs + 1):
208
+ loss = np.exp(-epoch) + np.random.random() * 0.1
209
+ acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
210
+ loss_values.append(loss)
211
+ accuracy_values.append(acc)
212
+ yield epoch, loss_values, accuracy_values
213
+ time.sleep(1)
214
+
215
+
216
+ def plot_training_metrics(epochs, loss_values, accuracy_values):
217
+ """
218
+ Plots training loss and accuracy.
219
+ """
220
+ fig, ax = plt.subplots(1, 2, figsize=(12, 4))
221
+ ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
222
+ ax[0].set_title("Training Loss")
223
+ ax[0].set_xlabel("Epoch")
224
+ ax[0].set_ylabel("Loss")
225
+
226
+ ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
227
+ ax[1].set_title("Training Accuracy")
228
+ ax[1].set_xlabel("Epoch")
229
+ ax[1].set_ylabel("Accuracy")
230
+
231
+ return fig
232
+
233
+
234
+ def generate_response(prompt, model, tokenizer, max_length=200):
235
+ """
236
+ Generates a response using the fine-tuned model.
237
+ """
238
+ try:
239
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids
240
+
241
+ with torch.no_grad():
242
+ outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
243
+
244
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
245
+
246
+ except Exception as e:
247
+ st.error(f"❌ Response generation failed: {e}")
248
+ return ""
249
+
250
+
251
+ # -------------------------------
252
+ # Model Loading for Inference
253
+ # -------------------------------
254
+
255
+ def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
256
+ """
257
+ Loads a fine-tuned model from a checkpoint.
258
+ """
259
+ if os.path.exists(checkpoint_path):
260
+ model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
261
+ model.eval()
262
+ st.success("✅ Fine-tuned model loaded successfully!")
263
+ else:
264
+ st.error(f"❌ Checkpoint not found: {checkpoint_path}")
265
+ return model
266
+
267
+
268
+
269
+ import pandas as pd
270
+ import os
271
+ import pyarrow as pa
272
+ import numpy as np
273
+ from scipy.stats import zscore, kurtosis, skew
274
+
275
+
276
+ # ======================================
277
+ # Dataset Operations
278
+ # ======================================
279
+ def load_dataset(path: str) -> pd.DataFrame:
280
+ """Load dataset from CSV with error handling."""
281
+ try:
282
+ df = pd.read_csv(path)
283
+ return make_arrow_compatible(df)
284
+ except Exception as e:
285
+ print(f"Error loading dataset: {e}")
286
+ return pd.DataFrame()
287
+
288
+
289
+ def save_dataset(df: pd.DataFrame, path: str):
290
+ """Save dataset to CSV with error handling."""
291
+ try:
292
+ df.to_csv(path, index=False)
293
+ except Exception as e:
294
+ print(f"Error saving dataset: {e}")
295
+
296
+
297
+ def list_datasets(directory: str = "datasets") -> list:
298
+ """List all available datasets in the directory."""
299
+ try:
300
+ return [f for f in os.listdir(directory) if f.endswith(('.csv', '.json', '.xlsx'))]
301
+ except Exception as e:
302
+ print(f"Error listing datasets: {e}")
303
+ return []
304
+
305
+
306
+ # ======================================
307
+ # Data Cleaning Functions
308
+ # ======================================
309
+ def clean_dataset(
310
+ df: pd.DataFrame,
311
+ remove_duplicates: bool = True,
312
+ fill_missing: bool = False,
313
+ fill_value: str = "0",
314
+ trim_spaces: bool = True
315
+ ) -> pd.DataFrame:
316
+ """
317
+ Clean the dataset with multiple operations:
318
+ - Remove duplicates
319
+ - Fill missing values
320
+ - Trim spaces
321
+ - Remove empty columns and rows
322
+ - Auto-cast date columns
323
+ """
324
+ # Remove duplicates
325
+ if remove_duplicates:
326
+ df = df.drop_duplicates()
327
+
328
+ # Fill missing values
329
+ if fill_missing:
330
+ df = df.fillna(fill_value)
331
+
332
+ # Trim spaces
333
+ if trim_spaces:
334
+ df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
335
+
336
+ # Remove empty columns & rows
337
+ df = df.dropna(how="all", axis=1)
338
+ df = df.dropna(how="all", axis=0)
339
+
340
+ # Auto-cast date columns
341
+ for col in df.columns:
342
+ try:
343
+ df[col] = pd.to_datetime(df[col])
344
+ except (ValueError, TypeError):
345
+ pass
346
+
347
+ return make_arrow_compatible(df)
348
+
349
+
350
+ # --------------------------------------
351
+ # Dataset Quality Score
352
+ # --------------------------------------
353
+ def compute_dataset_score(df):
354
+ """Compute dataset quality score."""
355
+ if df.empty:
356
+ return 0.0
357
+
358
+ total_cells = np.product(df.shape)
359
+ missing_cells = df.isnull().sum().sum()
360
+ missing_ratio = missing_cells / total_cells
361
+
362
+ duplicate_ratio = 1 - (df.drop_duplicates().shape[0] / df.shape[0])
363
+
364
+ numeric_cols = df.select_dtypes(include=["number"]).columns
365
+ if len(numeric_cols) > 0:
366
+ skew_vals = df[numeric_cols].apply(lambda x: np.abs(skew(x.dropna())), axis=0)
367
+ kurt_vals = df[numeric_cols].apply(lambda x: np.abs(kurtosis(x.dropna())), axis=0)
368
+ numeric_score = 1 - (skew_vals.mean() + kurt_vals.mean()) / 10
369
+ else:
370
+ numeric_score = 1
371
+
372
+ score = (1 - missing_ratio) * (1 - duplicate_ratio) * numeric_score * 100
373
+ return round(score, 2)
374
+
375
+
376
+ # --------------------------------------
377
+ # Outlier Detection
378
+ # --------------------------------------
379
+ def detect_outliers(df, threshold=3):
380
+ """Detect outliers in numeric columns using Z-score."""
381
+ numeric_cols = df.select_dtypes(include=["number"]).columns
382
+ outliers = {}
383
+ for col in numeric_cols:
384
+ z_scores = np.abs(zscore(df[col].dropna()))
385
+ outliers[col] = np.sum(z_scores > threshold)
386
+ return outliers
387
+
388
+
389
+ # --------------------------------------
390
+ # Detect Inconsistent Types
391
+ # --------------------------------------
392
+ def detect_inconsistent_types(df):
393
+ """Detect inconsistent data types across columns."""
394
+ inconsistent_cols = {}
395
+ for col in df.columns:
396
+ if df[col].apply(type).nunique() > 1:
397
+ inconsistent_cols[col] = df[col].apply(type).value_counts().to_dict()
398
+ return inconsistent_cols
399
+
400
+
401
+ # ======================================
402
+ # Data Transformations
403
+ # ======================================
404
+ def apply_transformation(df: pd.DataFrame, col: str, transform: str) -> pd.DataFrame:
405
+ """
406
+ Apply transformations to a specified column:
407
+ - Log Transformation
408
+ - Min-Max Normalization
409
+ - Z-score Standardization
410
+ """
411
+ if col not in df.columns:
412
+ raise KeyError(f"Column '{col}' not found in dataset")
413
+
414
+ if transform == "Log":
415
+ df[col] = np.log1p(df[col].replace(0, np.nan)).fillna(0)
416
+
417
+ elif transform == "Normalize":
418
+ df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
419
+
420
+ elif transform == "Standardize":
421
+ df[col] = (df[col] - df[col].mean()) / df[col].std()
422
+
423
+ return make_arrow_compatible(df)
424
+
425
+
426
+ # ======================================
427
+ # Normalization & Standardization
428
+ # ======================================
429
+ def normalize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
430
+ """Normalize column (Min-Max Scaling)."""
431
+ df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
432
+ return df
433
+
434
+
435
+ def standardize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
436
+ """Standardize column (Z-score)."""
437
+ df[col] = (df[col] - df[col].mean()) / df[col].std()
438
+ return df
439
+
440
+
441
+ # ======================================
442
+ # Arrow Compatibility & Fixes
443
+ # ======================================
444
+ def make_arrow_compatible(df: pd.DataFrame) -> pd.DataFrame:
445
+ """
446
+ Ensure dataset compatibility with Streamlit Arrow serialization.
447
+ """
448
+ for col in df.columns:
449
+ if df[col].dtype == object:
450
+ try:
451
+ df[col] = df[col].astype(str)
452
+ except Exception as e:
453
+ print(f"Could not convert column {col}: {e}")
454
+ return df
455
+
456
+
457
+ def fix_arrow_incompatibility(df: pd.DataFrame) -> pd.DataFrame:
458
+ """
459
+ Fix Arrow incompatibility by converting mixed types to `str`.
460
+ """
461
+ for col in df.columns:
462
+ try:
463
+ pa.Table.from_pandas(df[[col]])
464
+ except pa.lib.ArrowInvalid:
465
+ print(f"Arrow compatibility issue in column: {col}")
466
+ df[col] = df[col].astype(str)
467
+ return df
468
+