Spaces:
Build error
Build error
Commit
·
2bdd84f
1
Parent(s):
a997aba
New Framework Change
Browse files- __pycache__/utils.cpython-313.pyc +0 -0
- app.py +12 -349
- datasets/train_data.csv +373 -0
- pages/Chat.py +153 -0
- pages/Conversion.py +24 -0
- pages/Dataset_Management.py +360 -0
- pages/Finetune.py +170 -0
- requirements.txt +7 -1
- utils.py +468 -0
__pycache__/utils.cpython-313.pyc
ADDED
Binary file (20.8 kB). View file
|
|
app.py
CHANGED
@@ -1,355 +1,18 @@
|
|
1 |
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
-
import torch
|
5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
-
import time
|
8 |
-
import json
|
9 |
-
import re
|
10 |
-
import os
|
11 |
-
import asyncio
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
# Utility Functions
|
17 |
-
# -------------------------------
|
18 |
-
|
19 |
-
token = st.secrets["HF_TOKEN"]
|
20 |
-
os.environ['CURL_CA_BUNDLE'] = ''
|
21 |
-
|
22 |
-
@st.cache_resource
|
23 |
-
def load_model(model_id: str, token: str):
|
24 |
-
"""
|
25 |
-
Loads and caches the Gemma model and tokenizer with authentication token.
|
26 |
-
"""
|
27 |
-
try:
|
28 |
-
# Create and run an event loop explicitly
|
29 |
-
asyncio.run(async_load(model_id, token))
|
30 |
-
|
31 |
-
# Ensure torch classes path is valid (optional)
|
32 |
-
if not hasattr(torch, "classes") or not torch.classes:
|
33 |
-
torch.classes = torch._C._get_python_module("torch.classes")
|
34 |
-
|
35 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
36 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
|
37 |
-
|
38 |
-
return tokenizer, model
|
39 |
-
|
40 |
-
except Exception as e:
|
41 |
-
print(f"An error occurred: {e}")
|
42 |
-
st.error(f"Model loading failed: {e}")
|
43 |
-
return None, None
|
44 |
-
|
45 |
-
async def async_load(model_id, token):
|
46 |
-
"""
|
47 |
-
Dummy async function to initialize the event loop.
|
48 |
-
"""
|
49 |
-
await asyncio.sleep(0.1) # Dummy async operation
|
50 |
-
|
51 |
-
def preprocess_data(uploaded_file, file_extension):
|
52 |
-
"""
|
53 |
-
Reads the uploaded file and returns a processed version.
|
54 |
-
Supports CSV, JSONL, and TXT.
|
55 |
-
"""
|
56 |
-
data = None
|
57 |
-
try:
|
58 |
-
if file_extension == "csv":
|
59 |
-
data = pd.read_csv(uploaded_file)
|
60 |
-
elif file_extension == "jsonl":
|
61 |
-
# Each line is a JSON object.
|
62 |
-
data = [json.loads(line) for line in uploaded_file.readlines()]
|
63 |
-
try:
|
64 |
-
data = pd.DataFrame(data)
|
65 |
-
except Exception:
|
66 |
-
st.warning("Unable to convert JSONL to a table. Previewing raw JSON objects.")
|
67 |
-
elif file_extension == "txt":
|
68 |
-
text_data = uploaded_file.read().decode("utf-8")
|
69 |
-
data = text_data.splitlines()
|
70 |
-
except Exception as e:
|
71 |
-
st.error(f"Error processing file: {e}")
|
72 |
-
return data
|
73 |
-
|
74 |
-
def clean_text(text, lowercase=True, remove_punctuation=True):
|
75 |
-
"""
|
76 |
-
Cleans text data by applying basic normalization.
|
77 |
-
"""
|
78 |
-
if lowercase:
|
79 |
-
text = text.lower()
|
80 |
-
if remove_punctuation:
|
81 |
-
text = re.sub(r'[^\w\s]', '', text)
|
82 |
-
return text
|
83 |
-
|
84 |
-
def plot_training_metrics(epochs, loss_values, accuracy_values):
|
85 |
-
"""
|
86 |
-
Returns a matplotlib figure plotting training loss and accuracy.
|
87 |
-
"""
|
88 |
-
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
|
89 |
-
ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
|
90 |
-
ax[0].set_title("Training Loss")
|
91 |
-
ax[0].set_xlabel("Epoch")
|
92 |
-
ax[0].set_ylabel("Loss")
|
93 |
-
|
94 |
-
ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
|
95 |
-
ax[1].set_title("Training Accuracy")
|
96 |
-
ax[1].set_xlabel("Epoch")
|
97 |
-
ax[1].set_ylabel("Accuracy")
|
98 |
-
|
99 |
-
return fig
|
100 |
-
|
101 |
-
def simulate_training(num_epochs):
|
102 |
-
"""
|
103 |
-
Simulates a training loop for demonstration.
|
104 |
-
Yields current epoch, loss values, and accuracy values.
|
105 |
-
Replace this with your actual fine-tuning loop.
|
106 |
-
"""
|
107 |
-
loss_values = []
|
108 |
-
accuracy_values = []
|
109 |
-
for epoch in range(1, num_epochs + 1):
|
110 |
-
loss = np.exp(-epoch) + np.random.random() * 0.1
|
111 |
-
acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
|
112 |
-
loss_values.append(loss)
|
113 |
-
accuracy_values.append(acc)
|
114 |
-
yield epoch, loss_values, accuracy_values
|
115 |
-
time.sleep(1) # Simulate computation time
|
116 |
-
|
117 |
-
def quantize_model(model):
|
118 |
-
"""
|
119 |
-
Applies dynamic quantization for demonstration.
|
120 |
-
In practice, adjust this based on your model and target hardware.
|
121 |
-
"""
|
122 |
-
quantized_model = torch.quantization.quantize_dynamic(
|
123 |
-
model, {torch.nn.Linear}, dtype=torch.qint8
|
124 |
-
)
|
125 |
-
return quantized_model
|
126 |
-
|
127 |
-
def convert_to_torchscript(model):
|
128 |
-
"""
|
129 |
-
Converts the model to TorchScript format.
|
130 |
-
"""
|
131 |
-
example_input = torch.randint(0, 100, (1, 10))
|
132 |
-
traced_model = torch.jit.trace(model, example_input)
|
133 |
-
return traced_model
|
134 |
-
|
135 |
-
def convert_to_onnx(model, output_path="model.onnx"):
|
136 |
-
"""
|
137 |
-
Converts the model to ONNX format.
|
138 |
-
"""
|
139 |
-
dummy_input = torch.randint(0, 100, (1, 10))
|
140 |
-
torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
|
141 |
-
return output_path
|
142 |
-
|
143 |
-
def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
|
144 |
-
"""
|
145 |
-
Loads the fine-tuned model from the checkpoint.
|
146 |
-
"""
|
147 |
-
if os.path.exists(checkpoint_path):
|
148 |
-
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
149 |
-
model.eval()
|
150 |
-
st.success("Fine-tuned model loaded successfully!")
|
151 |
-
else:
|
152 |
-
st.error(f"Checkpoint not found: {checkpoint_path}")
|
153 |
-
return model
|
154 |
-
|
155 |
-
|
156 |
-
def generate_response(prompt, model, tokenizer, max_length=200):
|
157 |
-
"""
|
158 |
-
Generates a response using the fine-tuned model.
|
159 |
-
"""
|
160 |
-
# Tokenize the prompt
|
161 |
-
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
162 |
-
|
163 |
-
# Generate text
|
164 |
-
with torch.no_grad():
|
165 |
-
outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
|
166 |
-
|
167 |
-
# Decode the output
|
168 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
169 |
-
return response
|
170 |
-
|
171 |
-
|
172 |
-
# -------------------------------
|
173 |
-
# Application Layout
|
174 |
-
# -------------------------------
|
175 |
-
|
176 |
-
st.title("One-Stop Gemma Model Fine-tuning, Quantization & Conversion UI")
|
177 |
-
st.markdown("""
|
178 |
-
This application is designed for beginners in generative AI.
|
179 |
-
It allows you to fine-tune, quantize, and convert Gemma models with an intuitive UI.
|
180 |
-
You can upload your dataset, clean and preview your data, configure training parameters, and export your model in different formats.
|
181 |
-
""")
|
182 |
-
|
183 |
-
# Sidebar: Model selection and data upload
|
184 |
-
st.sidebar.header("Configuration")
|
185 |
-
|
186 |
-
# Model Selection
|
187 |
-
selected_model = st.sidebar.selectbox("Select Gemma Model", options=["Gemma-Small", "Gemma-Medium", "Gemma-Large"])
|
188 |
-
if selected_model == "google/gemma-3-1b-it":
|
189 |
-
model_id = "google/gemma-3-1b-it"
|
190 |
-
elif selected_model == "google/gemma-3-4b-it":
|
191 |
-
model_id = "google/gemma-3-4b-it"
|
192 |
-
else:
|
193 |
-
model_id = "google/gemma-3-1b-it"
|
194 |
-
|
195 |
-
loading_placeholder = st.sidebar.empty()
|
196 |
-
loading_placeholder.info("Loading model...")
|
197 |
-
tokenizer, model = load_model(model_id, token)
|
198 |
-
loading_placeholder.success("Model loaded.")
|
199 |
-
|
200 |
-
|
201 |
-
# Dataset Upload
|
202 |
-
uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSONL, TXT)", type=["csv", "jsonl", "txt"])
|
203 |
-
data = None
|
204 |
-
if uploaded_file is not None:
|
205 |
-
file_ext = uploaded_file.name.split('.')[-1].lower()
|
206 |
-
data = preprocess_data(uploaded_file, file_ext)
|
207 |
-
st.sidebar.subheader("Dataset Preview:")
|
208 |
-
if isinstance(data, pd.DataFrame):
|
209 |
-
st.sidebar.dataframe(data.head())
|
210 |
-
elif isinstance(data, list):
|
211 |
-
st.sidebar.write(data[:5])
|
212 |
-
else:
|
213 |
-
st.sidebar.write(data)
|
214 |
-
else:
|
215 |
-
st.sidebar.info("Awaiting dataset upload.")
|
216 |
-
|
217 |
-
# Data Cleaning Options (for TXT files)
|
218 |
-
if uploaded_file is not None and file_ext == "txt":
|
219 |
-
st.sidebar.subheader("Data Cleaning Options")
|
220 |
-
lowercase_option = st.sidebar.checkbox("Convert to lowercase", value=True)
|
221 |
-
remove_punct = st.sidebar.checkbox("Remove punctuation", value=True)
|
222 |
-
cleaned_data = [clean_text(line, lowercase=lowercase_option, remove_punctuation=remove_punct) for line in data]
|
223 |
-
st.sidebar.text_area("Cleaned Data Preview", value="\n".join(cleaned_data[:5]), height=150)
|
224 |
-
|
225 |
-
# Main Tabs for Different Operations
|
226 |
-
tabs = st.tabs(["Fine-tuning", "Quantization", "Model Conversion"])
|
227 |
-
|
228 |
-
# -------------------------------
|
229 |
-
# Fine-tuning Tab
|
230 |
-
# -------------------------------
|
231 |
-
with tabs[0]:
|
232 |
-
st.header("Fine-tuning")
|
233 |
-
st.markdown("Configure hyperparameters and start fine-tuning your Gemma model.")
|
234 |
-
|
235 |
-
col1, col2, col3 = st.columns(3)
|
236 |
-
with col1:
|
237 |
-
learning_rate = st.number_input("Learning Rate", value=1e-4, format="%.5f")
|
238 |
-
with col2:
|
239 |
-
batch_size = st.number_input("Batch Size", value=16, step=1)
|
240 |
-
with col3:
|
241 |
-
epochs = st.number_input("Epochs", value=3, step=1)
|
242 |
-
|
243 |
-
if st.button("Start Fine-tuning"):
|
244 |
-
if data is None:
|
245 |
-
st.error("Please upload a dataset first!")
|
246 |
-
else:
|
247 |
-
st.info("Starting fine-tuning...")
|
248 |
-
progress_bar = st.progress(0)
|
249 |
-
training_placeholder = st.empty()
|
250 |
-
loss_values = []
|
251 |
-
accuracy_values = []
|
252 |
-
|
253 |
-
# Simulate training loop (replace with your actual training code)
|
254 |
-
for epoch, losses, accs in simulate_training(epochs):
|
255 |
-
fig = plot_training_metrics(epoch, losses, accs)
|
256 |
-
training_placeholder.pyplot(fig)
|
257 |
-
progress_bar.progress(epoch/epochs)
|
258 |
-
st.success("Fine-tuning completed!")
|
259 |
-
|
260 |
-
# Save the fine-tuned model (for demonstration, saving state_dict)
|
261 |
-
if model:
|
262 |
-
torch.save(model.state_dict(), "fine_tuned_model.pt")
|
263 |
-
with open("fine_tuned_model.pt", "rb") as f:
|
264 |
-
st.download_button("Download Fine-tuned Model", data=f, file_name="fine_tuned_model.pt", mime="application/octet-stream")
|
265 |
-
else:
|
266 |
-
st.error("Model not loaded. Cannot save.")
|
267 |
-
|
268 |
-
|
269 |
-
# -------------------------------
|
270 |
-
# Quantization Tab
|
271 |
-
# -------------------------------
|
272 |
-
with tabs[1]:
|
273 |
-
st.header("Model Quantization")
|
274 |
-
st.markdown("Quantize your model to optimize for inference performance.")
|
275 |
-
quantize_choice = st.radio("Select Quantization Type", options=["Dynamic Quantization"], index=0)
|
276 |
-
|
277 |
-
if st.button("Apply Quantization"):
|
278 |
-
with st.spinner("Applying quantization..."):
|
279 |
-
quantized_model = quantize_model(model)
|
280 |
-
st.success("Model quantized successfully!")
|
281 |
-
torch.save(quantized_model.state_dict(), "quantized_model.pt")
|
282 |
-
with open("quantized_model.pt", "rb") as f:
|
283 |
-
st.download_button("Download Quantized Model", data=f, file_name="quantized_model.pt", mime="application/octet-stream")
|
284 |
-
|
285 |
-
# -------------------------------
|
286 |
-
# Model Conversion Tab
|
287 |
-
# -------------------------------
|
288 |
-
with tabs[2]:
|
289 |
-
st.header("Model Conversion")
|
290 |
-
st.markdown("Convert your model to a different format for deployment or optimization.")
|
291 |
-
conversion_option = st.selectbox("Select Conversion Format", options=["TorchScript", "ONNX"])
|
292 |
-
|
293 |
-
if st.button("Convert Model"):
|
294 |
-
if conversion_option == "TorchScript":
|
295 |
-
with st.spinner("Converting to TorchScript..."):
|
296 |
-
ts_model = convert_to_torchscript(model)
|
297 |
-
ts_model.save("model_ts.pt")
|
298 |
-
st.success("Converted to TorchScript!")
|
299 |
-
with open("model_ts.pt", "rb") as f:
|
300 |
-
st.download_button("Download TorchScript Model", data=f, file_name="model_ts.pt", mime="application/octet-stream")
|
301 |
-
elif conversion_option == "ONNX":
|
302 |
-
with st.spinner("Converting to ONNX..."):
|
303 |
-
onnx_path = convert_to_onnx(model, "model.onnx")
|
304 |
-
st.success("Converted to ONNX!")
|
305 |
-
with open(onnx_path, "rb") as f:
|
306 |
-
st.download_button("Download ONNX Model", data=f, file_name="model.onnx", mime="application/octet-stream")
|
307 |
-
|
308 |
-
# -------------------------------
|
309 |
-
# Response Generation Section
|
310 |
-
# -------------------------------
|
311 |
-
st.header("Generate Responses with Fine-Tuned Model")
|
312 |
-
st.markdown("Use the fine-tuned model to generate text responses based on your prompts.")
|
313 |
-
|
314 |
-
# Check if the fine-tuned model exists
|
315 |
-
if os.path.exists("fine_tuned_model.pt"):
|
316 |
-
# Load the fine-tuned model
|
317 |
-
model = load_finetuned_model(model, "fine_tuned_model.pt")
|
318 |
-
|
319 |
-
# Input prompt for generating responses
|
320 |
-
prompt = st.text_area("Enter a prompt:", "Once upon a time...")
|
321 |
-
|
322 |
-
# Max length slider
|
323 |
-
max_length = st.slider("Max Response Length", min_value=50, max_value=500, value=200, step=10)
|
324 |
-
|
325 |
-
if st.button("Generate Response"):
|
326 |
-
with st.spinner("Generating response..."):
|
327 |
-
response = generate_response(prompt, model, tokenizer, max_length)
|
328 |
-
st.success("Generated Response:")
|
329 |
-
st.write(response)
|
330 |
-
|
331 |
-
else:
|
332 |
-
st.warning("Fine-tuned model not found. Please fine-tune the model first.")
|
333 |
-
|
334 |
-
|
335 |
-
# -------------------------------
|
336 |
-
# Optional: Cloud Integration Snippet
|
337 |
-
# -------------------------------
|
338 |
-
st.header("Cloud Integration")
|
339 |
st.markdown("""
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
342 |
""")
|
343 |
-
st.code("""
|
344 |
-
from google.cloud import storage
|
345 |
-
|
346 |
-
def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
|
347 |
-
storage_client = storage.Client()
|
348 |
-
bucket = storage_client.bucket(bucket_name)
|
349 |
-
blob = bucket.blob(destination_blob_name)
|
350 |
-
blob.upload_from_filename(source_file_name)
|
351 |
-
print(f"Uploaded {source_file_name} to {destination_blob_name}")
|
352 |
|
353 |
-
#
|
354 |
-
#
|
355 |
-
""
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
|
4 |
|
5 |
+
st.title("Gemma LLM Fine-Tuning Suite 🚀")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
st.markdown("""
|
7 |
+
### 🔥 **Multi-page AI Model Trainer**
|
8 |
+
- **Chat**: Interact with the model.
|
9 |
+
- **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
|
10 |
+
- **Conversion**: Export models to TorchScript and ONNX.
|
11 |
+
- **Dataset Management**: View and add to your training data.
|
12 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
# st.sidebar.title("Navigation")
|
15 |
+
# st.sidebar.page_link("pages/Chat.py", label="🔹 Chat")
|
16 |
+
# st.sidebar.page_link("pages/Finetune.py", label="🔹 Fine-tuning")
|
17 |
+
# st.sidebar.page_link("pages/Conversion.py", label="🔹 Model Conversion")
|
18 |
+
# st.sidebar.page_link("pages/Dataset_Management.py", label="🔹 Dataset Management")
|
datasets/train_data.csv
ADDED
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
prompt,response
|
2 |
+
"Bhai, kal movie dekhne chale?","Haan bhai, kaunsi dekhni hai?"
|
3 |
+
Kya haal hai bhai?,"Bas bhai, zindagi chal rahi hai."
|
4 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin bas selfie kheechi thi 😎."
|
5 |
+
Tere paas charger hai?,"Haan bhai, par battery khatam hai 😬."
|
6 |
+
Tu itna late kyun aaya?,"Bhai, traffic ne jaan le li!"
|
7 |
+
Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya 😎."
|
8 |
+
Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai 😩."
|
9 |
+
Tune WhatsApp group chhod diya?,"Haan bhai, bas drama zyada ho gaya tha 😂."
|
10 |
+
Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai 🥂."
|
11 |
+
Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon 😎."
|
12 |
+
Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai 😂."
|
13 |
+
Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme 😂."
|
14 |
+
Kal cricket khelne chale?,"Bhai, fielding nahi karunga 😎."
|
15 |
+
Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de 😂."
|
16 |
+
Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai 😂."
|
17 |
+
Tune uska reply dekha?,"Haan bhai, bas seen maar diya 😢."
|
18 |
+
Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi 😂.
|
19 |
+
Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai 😂."
|
20 |
+
Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi 😂."
|
21 |
+
Khana kha liya?,"Haan bhai, lekin mann nahi bhara 😂."
|
22 |
+
Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi 😂."
|
23 |
+
Tu kal gym nahi gaya?,"Bhai, motivation so raha tha 😎."
|
24 |
+
Tune uska Insta story dekha?,"Haan bhai, dil toot gaya 😂."
|
25 |
+
"Bhai, kal salary aayi?","Haan bhai, aur gayi bhi 😂."
|
26 |
+
Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya 😂."
|
27 |
+
Tu kal late kyun aaya?,"Bhai, nind ki problem hai 😂."
|
28 |
+
Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon 😂."
|
29 |
+
"Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein 😂."
|
30 |
+
"Bhai, kal off hai kya?","Bhai, sapno mein hi hai 😂."
|
31 |
+
Tere gharwale strict hain?,"Bhai, Hitler ke fan hain 😂."
|
32 |
+
Tune match dekha?,"Haan bhai, heart attack ho gaya tha 😂."
|
33 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein tha 😂."
|
34 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi 😂."
|
35 |
+
Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon 😂."
|
36 |
+
Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le 😂."
|
37 |
+
Tu kal kahan tha?,"Bhai, so raha tha 😂."
|
38 |
+
Tera net slow hai?,"Bhai, 2G se bhi slow hai 😂."
|
39 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi 😂."
|
40 |
+
Bhai tera crush tera bhai ban gaya 😂,"Bhai, bas dua mein yaad rakhna 😂."
|
41 |
+
Tu kal kahan tha?,"Bhai, neend ka band baj raha tha 😂."
|
42 |
+
Tune late reply diya?,"Bhai, bas zindagi ka load hai 😂."
|
43 |
+
"Bhai, exam ka result aaya?","Haan bhai, bas asar nahi dikha 😂."
|
44 |
+
Tu cricket dekh raha hai?,"Bhai, dil thod diya unhone 😂."
|
45 |
+
"Bhai, kal ka plan cancel?","Bhai, neend ko priority di 😂."
|
46 |
+
Tune job apply ki?,"Bhai, apply nahi, try kar raha hoon 😂."
|
47 |
+
Tu kal pakda gaya?,"Bhai, meme share karte hue 😂."
|
48 |
+
Tera dost tujhse zyada cool hai?,"Bhai, thoda dukh hua 😂."
|
49 |
+
Tu zyada coffee peeta hai?,"Bhai, stress ka side effect hai 😂."
|
50 |
+
"Bhai, tera birthday aaya?","Haan bhai, par gift nahi aaya 😂."
|
51 |
+
Tu ghar pe hai?,"Haan bhai, ghar hi zindagi hai 😂."
|
52 |
+
Tu kal gaya tha?,"Bhai, gaya tha, bhool gaya 😂."
|
53 |
+
Tera dukh kya hai bhai?,"Bhai, bus Monday kaam pe jana hai 😂."
|
54 |
+
Tu kal kahan gaya?,"Bhai, sapno mein ghoom raha tha 😂."
|
55 |
+
Tu kal off tha?,"Bhai, bas dil se 😂."
|
56 |
+
Tune wo video dekha?,"Bhai, repeat pe chal raha hai 😂."
|
57 |
+
Tera dukh kya hai?,"Bhai, salary khatam ho gayi 😂."
|
58 |
+
Tu kal kaam pe gaya?,"Bhai, bas sochta hi reh gaya 😂."
|
59 |
+
"Bhai, kal milne chale?","Bhai, ghar hi safe hai 😂."
|
60 |
+
Tu kal late aaya?,"Bhai, traffic se dosti ho gayi 😂."
|
61 |
+
"Bhai, kal gym chal?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
|
62 |
+
Tune uska Insta story dekha?,"Haan bhai, lagta hai woh bhi filter ki dukaan khol li hai! 😆"
|
63 |
+
"Yaar, tu itna busy kyun rehta hai?","Kya karoon bhai, zindagi ne 'busy' button daba diya hai! 😅"
|
64 |
+
Aaj kal kya chal raha hai?,"Bas bhai, life ka 'software update' pending hai! 🤖"
|
65 |
+
Tera boss kaisa hai?,"Bhai, woh toh 'mood swing' ka live example hai! 😜"
|
66 |
+
Tu diet pe hai kya?,"Haan, bas 'momos' aur 'pizza' ko diet plan mein adjust karna hai! 🍕"
|
67 |
+
Kal party mein kyun nahi aaya?,"Bhai, mera 'Netflix' aur 'bed' ke saath commitment tha! 📺"
|
68 |
+
Tune naya web series dekha?,"Haan, ab toh 'binge-watching' mera naya talent ban gaya hai! 🎬"
|
69 |
+
Tera phone itna slow kyun hai?,"Bhai, yeh phone nahi, 'tortoise' hai! 🐢"
|
70 |
+
Tu itna late kyun aaya?,Traffic ne aaj phir se 'surprise test' le liya! 🚗
|
71 |
+
Kya haal hai bhai?,"Bas bhai, zindagi 'buffering' mode mein hai! ⏳"
|
72 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin bas 'selfie' kheechi thi! 📸"
|
73 |
+
Tere paas charger hai?,"Haan bhai, par battery khatam hai! 🔋"
|
74 |
+
Tune pizza order kiya?,"Haan bhai, extra cheese bhi maang liya! 🧀"
|
75 |
+
Office ka workload kaisa chal raha hai?,"Bhai, workload nahi, dukh chal raha hai! 😩"
|
76 |
+
Tune WhatsApp group chhod diya?,"Haan bhai, drama zyada ho gaya tha! 🎭"
|
77 |
+
Weekend pe kya plan hai?,"Bhai, full chill maarne ka mood hai! 🥂"
|
78 |
+
Kya tu aaj free hai?,"Free nahi bhai, bas zindagi ki maar kha raha hoon! 😎"
|
79 |
+
Exam ka preparation kaisa hai?,"Bhai, bas TikTok scrolling chal raha hai! 📱"
|
80 |
+
Tere paas notes hain?,"Haan bhai, par padhai se zyada memes hain usme! 😂"
|
81 |
+
Kal cricket khelne chale?,"Bhai, fielding nahi karunga! 🏏"
|
82 |
+
Shaadi kab kar raha hai bhai?,"Bhai, pehle job toh lagne de! 💼"
|
83 |
+
Tera phone kitna slow hai!,"Bhai, processor nahi, emotions chala raha hai! 🤖"
|
84 |
+
Tune uska reply dekha?,"Haan bhai, bas seen maar diya! 😢"
|
85 |
+
Bhai tu kab aayega ghar?,Bhai jab zindagi thoda sudhar jayegi! 🏠
|
86 |
+
Tera boss kaisa hai?,"Bhai, sach bolu? Zindagi ka villain hai! 😈"
|
87 |
+
Tera laptop hang ho gaya?,"Bhai, meri zindagi bhi hang ho gayi! 💻"
|
88 |
+
Khana kha liya?,"Haan bhai, lekin mann nahi bhara! 🍛"
|
89 |
+
Tu kal party mein aaya nahi?,"Bhai, neend zyada zaroori thi! 😴"
|
90 |
+
Tu kal gym nahi gaya?,"Bhai, motivation so raha tha! 🛌"
|
91 |
+
Tune uska Insta story dekha?,"Haan bhai, dil toot gaya! 💔"
|
92 |
+
"Bhai, kal salary aayi?","Haan bhai, aur gayi bhi! 💸"
|
93 |
+
Tune latest web series dekhi?,"Haan bhai, binge-watching expert ban gaya! 📺"
|
94 |
+
Tu kal late kyun aaya?,"Bhai, neend ki problem hai! 😪"
|
95 |
+
Tune ghar ka kaam kiya?,"Bhai, homework nahi, gharwork kar raha hoon! 🏡"
|
96 |
+
"Bhai, tu kahan hai?","Bhai, zindagi ke jhamelon mein! 🎢"
|
97 |
+
"Bhai, kal off hai kya?","Bhai, sapno mein hi hai! 💤"
|
98 |
+
Tere gharwale strict hain?,"Bhai, Hitler ke fan hain! 👨✈️"
|
99 |
+
Tune match dekha?,"Haan bhai, heart attack ho gaya tha! ⚽"
|
100 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein tha! 🌌"
|
101 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥"
|
102 |
+
Tu gym ja raha hai?,"Bhai, sirf reels dekh raha hoon! 📱"
|
103 |
+
Koi naya gaana recommend kar?,"Bhai, Atif ka purana sun le! 🎶"
|
104 |
+
Tu kal kahan tha?,"Bhai, so raha tha! 🛌"
|
105 |
+
Tera net slow hai?,"Bhai, 2G se bhi slow hai! 🐢"
|
106 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, budget nahi! 🛒"
|
107 |
+
Bhai tera crush tera bhai ban gaya!,"Bhai, bas dua mein yaad rakhna! 🙏"
|
108 |
+
Tu kal late kyun aaya?,"Bhai, alarm ne bhi haath utha diya tha! 😴"
|
109 |
+
Tera net itna slow kyun hai?,"Bhai, turtle race chal rahi hai! 🐢"
|
110 |
+
Tune kal ka match dekha?,"Haan bhai, dil ke saath umeed bhi tut gayi! 💔"
|
111 |
+
Tu office mein late kyun pahucha?,"Bhai, traffic nahi, zindagi slow chal rahi thi! 🚶♂️"
|
112 |
+
Tune naya phone liya?,"Haan bhai, EMI ke saath zindagi bhi le li! 💸"
|
113 |
+
Tu diet par hai kya?,"Bhai, sirf naam ka, pet ka nahi! 🍕"
|
114 |
+
Tu kal gym gaya tha?,"Haan bhai, bas treadmill dekh ke wapas aa gaya! 😂"
|
115 |
+
Tera boss kaisa hai?,"Bhai, uske face par hamesha Monday rehta hai! 😩"
|
116 |
+
Tune latest movie dekhi?,"Haan bhai, aur story khatam hone se pehle neend aa gayi! 😴"
|
117 |
+
"Bhai, kal ka plan pakka?","Bhai, bas mood ka bharosa nahi! 😎"
|
118 |
+
Tu kal party kyun nahi aaya?,"Bhai, ghar ka wifi chhod kar jaana nahi chahta tha! 😂"
|
119 |
+
Tune job apply ki?,"Bhai, apply nahi, bas try kar raha hoon! 🤞"
|
120 |
+
Tu cricket dekh raha hai?,"Bhai, dil hi tod diya unhone! 💔🏏"
|
121 |
+
Tu kal kaha gaya tha?,"Bhai, zindagi se milne gaya tha! 🤯"
|
122 |
+
Tera phone hang ho gaya?,"Bhai, phone nahi, patience hang ho gaya! 😫"
|
123 |
+
"Bhai, kal gym chale?","Bhai, bas protein shake ka sapna dekh raha hoon! 💪😎"
|
124 |
+
Tune new song suna?,"Haan bhai, ab toh playlist repeat pe chal rahi hai! 🎧"
|
125 |
+
"Bhai, kal milne chale?","Bhai, ghar ka wifi chod kar jaana nahi chahta! 😂"
|
126 |
+
Tu kal late kyun aaya?,"Bhai, neend ka overdose ho gaya tha! 😴"
|
127 |
+
Tu kal bike se gaya tha?,"Haan bhai, bas hawa se race laga raha tha! 🏍️💨"
|
128 |
+
"Bhai, tera pet kaisa hai?","Bhai, mujhse zyada royal treat mil raha hai usko! 🐾😂"
|
129 |
+
Tune latest web series dekhi?,"Bhai, binge-watching ke chakkar mein neend ud gayi! 📺😵"
|
130 |
+
Tu kal concert gaya tha?,"Haan bhai, awaaz gayab leke wapas aaya! 🎤😂"
|
131 |
+
Tu itna busy kyun hai?,"Bhai, zindagi ne full-time job de di hai! 😎"
|
132 |
+
Tune naya laptop liya?,"Haan bhai, EMI wali zindagi shuru ho gayi! 💻💸"
|
133 |
+
"Bhai, kal kaun sa movie dekhte hain?","Bhai, pehle budget check kar lete hain! 💰😂"
|
134 |
+
Tera data khatam ho gaya?,"Bhai, reels ka talent hi yeh hai! 📱😂"
|
135 |
+
Tu kal late kyun aaya?,"Bhai, bed ne chodhne se mana kar diya tha! 😂"
|
136 |
+
Tune ghar ka kaam kiya?,"Bhai, ghar ka kaam nahi, dukh pocha tha! 😂"
|
137 |
+
"Bhai, exam ka preparation kaisa hai?","Bhai, bas syllabus ke sapne dekh raha hoon! 📚😎"
|
138 |
+
Tu kal gym gaya tha?,"Haan bhai, lekin sirf water cooler tak! 🚶♂️😂"
|
139 |
+
Tune video banayi?,"Bhai, banayi nahi, viral ho gayi! 🎥🔥"
|
140 |
+
"Bhai, tu shopping gaya?","Haan bhai, window shopping expert ban gaya! 🛒😂"
|
141 |
+
Tune ghar ka kaam kiya?,"Bhai, bas mobile ka safai abhiyan chal raha tha! 📱😂"
|
142 |
+
Tu kal kahan tha?,"Bhai, khayalon mein ghoom raha tha! 🤯😂"
|
143 |
+
Tera boss kaisa hai?,"Bhai, Monday ke mood mein hi rehta hai! 😩"
|
144 |
+
Tu kal party gaya tha?,"Haan bhai, DJ se zyada khana baja raha tha! 🍕🥂😂"
|
145 |
+
Tune latest web series dekhi?,"Bhai, dekh ke neend ki yaad aa gayi! 😴📺"
|
146 |
+
Tere paas charger hai?,"Haan bhai, lekin khud bhi charging dhund raha hai! 🔋��"
|
147 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein trip karne ka plan hai! ✈️😂"
|
148 |
+
Tu kal gym gaya tha?,"Haan bhai, bas mirror selfies li thi! 📸😎"
|
149 |
+
Tune uska Insta dekha?,"Bhai, filter se zyada kuch nahi dikha! 😎😂"
|
150 |
+
Tu kal late kyun aaya?,"Bhai, traffic aur zindagi dono slow thi! 🚗😂"
|
151 |
+
Tera phone slow hai?,"Bhai, snail bhi sharma jaye is speed se! 🐌😂"
|
152 |
+
Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein fit nahi ho raha! 🍕😎"
|
153 |
+
"Bhai, kal milne chale?","Bhai, ghar ka wifi nahi chhod sakta! 😂"
|
154 |
+
Tu kal kaam pe gaya?,"Bhai, bas neend ka load leke gaya tha! 😴😂"
|
155 |
+
Tune shopping ki?,"Haan bhai, cart bhar diya, budget nahi! 🛒💸😂"
|
156 |
+
Tera crush online tha?,"Haan bhai, par reply nahi aaya! 😢😂"
|
157 |
+
Tune kal ka match dekha?,"Haan bhai, lagta hai team ne bhi hope chhod di! 😭🏏"
|
158 |
+
Tu kal late kyun aaya?,"Bhai, neend se break-up nahi ho raha tha! 😴😂"
|
159 |
+
Tune naya phone liya?,"Haan bhai, aur EMI leke zindagi bhi le li! 💸📱"
|
160 |
+
"Bhai, tu kitna busy rehta hai?","Bhai, zindagi full-time job ban gayi hai! 😂"
|
161 |
+
Tune gym join kiya?,"Haan bhai, par membership card hi exercise kar raha hai! 🏋️♂️😂"
|
162 |
+
Tera dukh kya hai bhai?,"Bhai, salary aayi thi, chali bhi gayi! 💸😩"
|
163 |
+
Tu kal kahan tha?,"Bhai, bed ke saath relationship strong ho raha tha! 🛏️❤️"
|
164 |
+
Tune ghar ka kaam kiya?,"Haan bhai, remote dhundhne ka kaam! 😂📺"
|
165 |
+
Tu kal movie gaya tha?,"Haan bhai, par ticket se zyada popcorn mehenga tha! 🍿💸"
|
166 |
+
Tune online shopping ki?,"Bhai, cart bhar diya, wallet khali hai! 😭🛒"
|
167 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par fielding se allergy ho gayi thi! 😂🏏"
|
168 |
+
Tera net slow hai?,"Bhai, lagta hai wifi bhi break le raha hai! 🐢😂"
|
169 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai! 🏝️😂"
|
170 |
+
Tu gym gaya tha?,"Haan bhai, par sirf dumbbell dekh ke wapas aa gaya! 💪😂"
|
171 |
+
Tune uska Insta dekha?,"Bhai, filter se chehra nahi, zindagi badal gayi! 😂📸"
|
172 |
+
"Bhai, kal ka plan confirm?","Bhai, bas mood ke upar depend karta hai! 😂"
|
173 |
+
Tune cricket dekha?,"Haan bhai, dil bhi toota aur TV bhi! 😂📺"
|
174 |
+
Tu kal date pe gaya tha?,"Haan bhai, par sirf bill bharne gaya tha! 😂💸"
|
175 |
+
Tera dukh kya hai bhai?,"Bhai, paise khatam aur mahina baaki hai! 😂💸"
|
176 |
+
"Bhai, tera boss kaisa hai?","Bhai, Monday ka live version hai! 😭😎"
|
177 |
+
Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
|
178 |
+
Tu kal late kyun aaya?,"Bhai, neend aur traffic dono se panga ho gaya! 😎🚗"
|
179 |
+
"Bhai, kal salary mili?","Haan bhai, aur khatam bhi ho gayi! 😂💸"
|
180 |
+
Tune ghar ka kaam kiya?,"Bhai, bas mobile ka storage saaf kiya! 📱😂"
|
181 |
+
Tera crush online tha?,"Haan bhai, par bas status update kiya! 😭😂"
|
182 |
+
"Bhai, kal kaam pe gaya?","Bhai, bas attendance dene gaya tha! 😂💼"
|
183 |
+
Tune movie dekhi?,"Haan bhai, story se zyada neend achhi thi! 😴🎥"
|
184 |
+
Tu diet pe hai kya?,"Haan bhai, par pizza diet plan mein nahi aata! 🍕😂"
|
185 |
+
Tera phone slow hai?,"Bhai, lagta hai 2G ka comeback ho gaya! 🐢📱"
|
186 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins dekhe! 😂🛍️"
|
187 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bed aur blanket se relationship strong karna hai! 😂🛏️"
|
188 |
+
Tu kal bike se gaya tha?,"Haan bhai, hawa se race laga raha tha! 🏍️💨"
|
189 |
+
Tune ghar ka kaam kiya?,"Bhai, bas remote dhundh raha tha! 😂📺"
|
190 |
+
Tu cricket khelta hai?,"Bhai, bas fielding avoid karta hoon! 😂🏏"
|
191 |
+
Tera boss strict hai?,"Bhai, usko smile bhi paid leave pe milti hai! 😂😎"
|
192 |
+
Tu kal late kyun aaya?,"Bhai, neend ka over-time ho gaya tha! 😴😂"
|
193 |
+
Tune naya web series dekha?,"Haan bhai, binge-watching expert ban gaya hoon! 📺🔥"
|
194 |
+
"Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai! 😂"
|
195 |
+
Tera dukh kya hai bhai?,"Bhai, zindagi ne no refund policy laga di hai! 😂"
|
196 |
+
Tu kal kahaan tha?,"Bhai, khayalon mein ghoom raha tha! 🌌😂"
|
197 |
+
"Bhai, kal cricket khelne chale?","Haan bhai, par batting hi karunga! 🏏😎"
|
198 |
+
Tune naya phone liya?,"Haan bhai, ab data nahi, EMI khatam ho rahi hai! 💸😂"
|
199 |
+
Tu kal movie dekhne gaya?,"Haan bhai, lekin ending se pehle neend aa gayi! 😴🎥"
|
200 |
+
Tera net slow hai?,"Bhai, turtle race chal rahi hai! 🐢📶"
|
201 |
+
Tune ghar ka kaam kiya?,"Haan bhai, bas fridge kholne ka kaam! 😂🍕"
|
202 |
+
"Bhai, kal gym chale?","Gym? Bhai, humara toh 'jimmedari' kaafi hai! 😂"
|
203 |
+
Tune latest movie dekhi?,"Haan bhai, story se zyada neend interesting thi! 😴🎥"
|
204 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par sirf toss jeeta! 😂🏏"
|
205 |
+
"Bhai, tera boss strict hai?","Bhai, usko toh chhutti ka spelling bhi nahi aata! 😂😎"
|
206 |
+
Tu kal late kyun aaya?,"Bhai, bed se alag hone ka mann nahi tha! 🛏️😂"
|
207 |
+
Tune naya song suna?,"Haan bhai, ab repeat pe chal raha hai! 🎧🔥"
|
208 |
+
Tera phone slow hai?,"Bhai, lagta hai snail bhi sharma jaye! 🐌📱😂"
|
209 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin sirf mannequins ko dekha! 😂🛍️"
|
210 |
+
"Bhai, kal gym gaya tha?","Nahi bhai, bas reels dekh ke calories jala raha hoon 😂📱"
|
211 |
+
Tune uska Insta story dekha?,"Haan bhai, full 'main character energy' thi! 🤩📸"
|
212 |
+
Tu kal late kyun aaya?,"Bhai, alarm aur meri dosti thodi toxic ho gayi hai 😴⏰"
|
213 |
+
Kal office mein kya scene tha?,"Bhai, same drama, different day! 😩💼"
|
214 |
+
Tera net slow hai?,"Bhai, lagta hai Airtel ne 2G ka throwback de diya! 🐢📶"
|
215 |
+
Tune naya phone liya?,"Haan bhai, EMI dekh ke ro raha hoon 💸😭"
|
216 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bill bharne mein hi pyaar khatam ho gaya 😂💀"
|
217 |
+
Tune ghar ka kaam kiya?,"Bhai, bas online shopping ke cart saaf kiya 😂🛒"
|
218 |
+
Weekend pe kya scene hai?,"Bhai, bas bed aur Netflix ka serious relationship hai 🍿❤️"
|
219 |
+
Tune naya gaana suna?,"Haan bhai, ab repeat pe chal raha hai, neighbors pareshaan hai 😂🎧"
|
220 |
+
Tera dukh kya hai bhai?,"Bhai, dukh nahi… bas 'low battery' wali zindagi hai 😭🔋"
|
221 |
+
Tu kal gym gaya tha?,"Haan bhai, bas cardio ke naam pe water cooler tak chala 😂🚶♂️"
|
222 |
+
Bhai tera crush tera bhai ban gaya 😂,"Bhai, ab bas rakhi ki tayyari kar raha hoon 😂😭"
|
223 |
+
Tune naya web series dekha?,"Haan bhai, binge-watching Olympic level pe hai! 🎯📺"
|
224 |
+
Tera phone slow hai?,"Bhai, snail bhi sharma jaye aisi speed hai 🐌📱😂"
|
225 |
+
Bhai kal party mein kyun nahi aaya?,"Bhai, mera bed se commitment tha 🛏️❤️"
|
226 |
+
Tune ghar ka kaam kiya?,"Bhai, bas fridge open-close wala cardio kiya 😂🍕"
|
227 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par ball se dosti nahi ho paayi 🏏😂"
|
228 |
+
Tera boss kaisa hai?,"Bhai, pura 'mood swing' ka calendar hai 😂😎"
|
229 |
+
Bhai kal kaam pe gaya?,"Haan bhai, attendance dene gaya tha bas 😂💼"
|
230 |
+
Tune naya phone liya?,"Haan bhai, EMI dekh ke laga phone nahi, loan liya hai 😂📱"
|
231 |
+
Tu kal gym gaya?,"Haan bhai, par bas mirror ke saamne flex kiya 😂💪"
|
232 |
+
"Bhai, weekend pe kya scene?","Bhai, bas 'napflix' and chill! 🛏️😎"
|
233 |
+
Tu kal shopping gaya tha?,"Haan bhai, lekin mannequins hi dekhe 😂🛍️"
|
234 |
+
Tune uska reply dekha?,"Haan bhai, bas 'seen' maar diya 💔😢"
|
235 |
+
Bhai kal milne chale?,"Bhai, ghar pe hi 'soft launch' ho raha hoon 😂🛋️"
|
236 |
+
Tu itna busy kyun hai?,"Bhai, life 'do not disturb' mode pe hai 😂📵"
|
237 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par sirf shadow practice ki 😂🏏"
|
238 |
+
Tune ghar ka kaam kiya?,"Bhai, bas remote dhundhne ka kaam 😂📺"
|
239 |
+
Tera boss strict hai?,"Bhai, usko chhutti ka spelling bhi nahi aata 😂😎"
|
240 |
+
"Bhai, kal ka plan fix?","Bhai, bas neend se permission leni baaki hai 😂😴"
|
241 |
+
Tune naya song suna?,"Haan bhai, ab toh ringtone bhi wahi hai 😂🎵"
|
242 |
+
Tera phone slow hai?,"Bhai, tortoise ko bhi sharam aa jaye 😂🐢"
|
243 |
+
Tu kal movie gaya tha?,"Haan bhai, par ending se pehle neend aa gayi 😂😴"
|
244 |
+
Tune naya meme dekha?,"Haan bhai, share karte karte battery khatam ho gayi 😂📱"
|
245 |
+
"Bhai, kal salary aayi?","Haan bhai, aur khatam bhi ho gayi 😂💸"
|
246 |
+
Tune naya gaana suna?,"Haan bhai, ab toh playlist ka raja ban gaya 😂🎧"
|
247 |
+
Tu kal late kyun aaya?,"Bhai, sapno ka 'overtime' ho gaya 😂💤"
|
248 |
+
Tune online shopping ki?,"Haan bhai, cart full, wallet empty 😂🛒"
|
249 |
+
"Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai 🥂😂"
|
250 |
+
Tune cricket dekha?,"Haan bhai, player se zyada umpire dekha 😂🏏"
|
251 |
+
"Bhai, tera boss strict hai?","Bhai, Monday ka human version hai 😂💼"
|
252 |
+
Tune movie dekhi?,"Haan bhai, par neend zyada interesting thi 😂😴"
|
253 |
+
Tu kal late kyun aaya?,"Bhai, bed ne break-up nahi diya 😂🛏️"
|
254 |
+
Tu cricket dekh raha hai?,"Haan bhai, TV ka remote nahi dekh raha 😂📺"
|
255 |
+
Tu kal gym gaya?,"Haan bhai, bas selfie kheechne 😂📸"
|
256 |
+
Tune ghar ka kaam kiya?,"Bhai, bas meme banane ka kaam 😂💻"
|
257 |
+
Tu kal shopping gaya?,"Haan bhai, lekin mannequins se hi baat ho gayi 😂🛍️"
|
258 |
+
Tera boss strict hai?,"Bhai, usko toh smile bhi paid leave pe milti hai 😂😎"
|
259 |
+
"Bhai, weekend pe kya scene?","Bhai, full 'ghar se hi ghar wapsi' 😂🏠"
|
260 |
+
Tune naya gaana suna?,"Haan bhai, ab toh lyrics bhi ratti ho gayi 😂🎵"
|
261 |
+
"Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai 😂🛌"
|
262 |
+
Tere boss ka mood kaisa hai aaj?,"Bhai, pura 'Monday on steroids' lag raha hai 😂💀"
|
263 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bill bharte hi break-up soch raha tha 💸😭"
|
264 |
+
Bhai kal ka meeting kaisa tha?,"Bhai, bas Zoom ka background enjoy kiya 😂💻"
|
265 |
+
Tune uska message dekha?,"Haan bhai, reply nahi, bas 'seen' maar diya 💔👀"
|
266 |
+
"Bhai, exam kaisa gaya?","Bhai, bas pen chal raha tha… dimaag nahi 😂🧠"
|
267 |
+
Tere boss ne kuch bola?,"Haan bhai, lagta hai unka breakup hua hai 😂💀"
|
268 |
+
Tu weekend pe kya kar raha hai?,"Bhai, bas 'Netflix and snore' mode on hai 😂🍿😴"
|
269 |
+
Tune salary check ki?,"Haan bhai, but lagta hai HR ne 'prank' kiya hai 😂💸"
|
270 |
+
Tera WiFi slow hai?,"Bhai, snail bhi race jeet jayega 😂🐢"
|
271 |
+
Kal ka gym scene hai kya?,"Gym nahi bhai, sirf 'gymmedariyan' hai 😂💼"
|
272 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha 😂🤦♂️"
|
273 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada sundar lag raha tha 😂📸"
|
274 |
+
Tera crush online hai kya?,"Haan bhai, par bas 'last seen' ka ehsaas de rahi hai 💔😂"
|
275 |
+
"Bhai, kal movie ka plan hai?","Bhai, 'pockets empty' ka plan hai 😂💸"
|
276 |
+
Tu gym join karega kya?,"Bhai, bas 'intentions fit' hai, body nahi 😂💪"
|
277 |
+
Tere boss ne kuch poocha?,"Haan bhai, bas HR jaisa dikh raha tha 😂💀"
|
278 |
+
Tune ghar ka kaam kiya?,"Bhai, bas dishes aur dreams donon dhoye 😂🍽️😴"
|
279 |
+
Tere weekend plans kya hai?,"Bhai, bas neend aur napka date hai 😂🛌"
|
280 |
+
Tu kal office gaya tha?,"Haan bhai, par attendance dene gaya tha bas 😂💼"
|
281 |
+
Tune ghar ka renovation kiya?,"Bhai, bas sofa ka position badla 😂🛋️"
|
282 |
+
Tu kal late kyun aaya?,"Bhai, traffic nahi, neend heavy thi 😂😴"
|
283 |
+
Tera net slow hai kya?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
|
284 |
+
"Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai 😂🏏"
|
285 |
+
Tu kal party gaya tha?,"Haan bhai, par free snacks pe zyada dhyan tha 😂🍕"
|
286 |
+
"Bhai, tere paas charger hai?","Haan bhai, par battery khatam hai 😂🔋"
|
287 |
+
Tune weekend pe kya kiya?,"Bhai, bas bed ke saath commitment nibha raha tha 😂🛏️"
|
288 |
+
"Bhai, salary gayi?","Haan bhai, udti chidiya bhi nahi thi, bas ud gayi 😂💸"
|
289 |
+
Tune naya meme dekha?,"Haan bhai, do baar share bhi kiya 😂📱"
|
290 |
+
"Bhai, tera pet bhag gaya?","Haan bhai, EMI dekh ke 😂🐕🦺"
|
291 |
+
Tera phone slow hai?,"Bhai, turtle bhi inspire ho jaye 😂🐢"
|
292 |
+
"Bhai, tu naya laptop le raha hai?","Haan bhai, EMI lene ka plan hai 😂💻"
|
293 |
+
Tune uska tweet dekha?,"Haan bhai, lagta hai usne Elon Musk ko hire kar liya 😂🐦"
|
294 |
+
"Bhai, kal late kyun aaya?","Bhai, neend ka heavy dose ho gaya tha 😂😴"
|
295 |
+
Tu kal gym gaya tha?,"Haan bhai, par bas locker ka lock khol ke aa gaya 😂🔒"
|
296 |
+
"Bhai, tera dukh kya hai?","Bhai, 'low balance' aur 'low battery' ek saath hai 😂💸🔋"
|
297 |
+
Tere gharwale strict hai?,"Bhai, 'WiFi password' se bhi zyada 😂🔒"
|
298 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada fake lag raha tha 😂📸"
|
299 |
+
Tera dost cool hai?,"Bhai, uski coolness dekh ke AC bhi sharma jaye 😂❄️"
|
300 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'undo' button dhoond raha hoon 😂⏪"
|
301 |
+
Tu kal cricket khelne gaya?,"Haan bhai, par bas fielding se dushmani ho gayi 😂🏏"
|
302 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas 😂💼"
|
303 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota 😂💔🏏"
|
304 |
+
Tune naya song suna?,"Haan bhai, repeat pe chal raha hai 😂🎧"
|
305 |
+
Tera net slow hai?,"Bhai, pigeon se bhi slow chal raha hai 😂🐦"
|
306 |
+
"Bhai, tune naya meme dekha?","Haan bhai, battery khatam kar diya share karte 😂📱"
|
307 |
+
"Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai 😂💀"
|
308 |
+
Tune kal gym join kiya?,"Haan bhai, par bas 'selfie membership' li hai 📸😂"
|
309 |
+
"Bhai, tu diet pe hai?","Haan bhai, bas 'pizza' aur 'biryani' ko healthy maan liya 😂🍕🍗"
|
310 |
+
Tere boss ka mood kaisa hai?,"Bhai, lagta hai unke 'data plan' ka bhi expiry ho gaya 😂📉"
|
311 |
+
Tu kal kitne baje soya?,"Bhai, bas 'Netflix' ne raat ka 'delete button' daba diya 😂📺"
|
312 |
+
"Bhai, tera crush tujhe bhool gaya?","Haan bhai, par Instagram memories nahi 😂💔📱"
|
313 |
+
Tune weekend pe kya kiya?,"Bhai, bas 'bed' se zyada serious relationship mein tha 😂🛏️"
|
314 |
+
Tu kal late kyun aaya?,"Bhai, traffic nahi, bas 'mood swing' heavy tha 😂🚦"
|
315 |
+
Tune naya song suna?,"Haan bhai, ab toh 'repeat' se bhi dosti ho gayi 😂🎧"
|
316 |
+
Tera dukh kya hai?,"Bhai, salary bhi 'fast forward' mode mein chali gayi 😂💸"
|
317 |
+
Tu kal cricket khel raha tha?,"Haan bhai, par fielding se 'breakup' ho gaya 😂🏏"
|
318 |
+
Tere gharwale strict hai?,"Bhai, WiFi password se bhi zyada 😂🔒"
|
319 |
+
Tune naya filter try kiya?,"Haan bhai, asli se zyada 'animated' lag raha tha 😂📸"
|
320 |
+
Tu kal date pe gaya tha?,"Haan bhai, par bas 'pocket money' ka breakup ho gaya 😂💸"
|
321 |
+
"Bhai, weekend pe kya plan hai?","Bhai, full 'bedflix and nap' mode on hai 😂🛌🍿"
|
322 |
+
Tu gym nahi gaya?,"Bhai, bas 'motivation' ne leave le liya 😂💪"
|
323 |
+
Tera net slow hai?,"Bhai, turtle bhi 'fast and furious' lag raha hai 😂🐢"
|
324 |
+
"Bhai, tu cricket dekh raha hai?","Haan bhai, par umpire zyada entertaining hai 😂👀"
|
325 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'undo button' dhoond raha hoon 😂⏪"
|
326 |
+
Tera phone slow hai?,"Bhai, snail bhi jeet jayega race mein 😂🐌"
|
327 |
+
Tune naya reel banaya?,"Haan bhai, viral nahi hua, bas family ne dekha 😂📱"
|
328 |
+
Tere boss ne kuch bola?,"Haan bhai, bas 'mental gym' karwa rahe the 😂💀"
|
329 |
+
"Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga 😂🏠"
|
330 |
+
Tune naya job apply kiya?,"Haan bhai, par HR ne 'seen' maar diya 😂💼"
|
331 |
+
Tu aaj kal busy hai?,"Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
|
332 |
+
"Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya 😂⏰"
|
333 |
+
Tu naya phone le raha hai?,"Haan bhai, par EMI ka dukh zyada hai 😂📱💸"
|
334 |
+
"Bhai, tera laptop slow hai?","Bhai, lagta hai 'Windows 95' par chal raha hai 😂💻"
|
335 |
+
Tune ghar ka renovation kiya?,"Bhai, bas 'sofa' ka angle change kiya 😂🛋️"
|
336 |
+
"Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai 😂🍿💸"
|
337 |
+
Tera dukh kya hai?,"Bhai, salary aur battery dono low hai 😂💸🔋"
|
338 |
+
Tune naya meme dekha?,"Haan bhai, share karte hi battery gayab 😂📱⚡"
|
339 |
+
Tu kal party gaya tha?,"Haan bhai, par free snacks hi target tha 😂🍕"
|
340 |
+
Tera boss kaisa hai?,"Bhai, 'Monday' se bhi zyada toxic hai 😂💀"
|
341 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'mind cleaning' kar raha tha 😂🧠"
|
342 |
+
"Bhai, kal cricket khela?","Haan bhai, bas ball dhoondhne gaya tha 😂🏏"
|
343 |
+
Tu kal late kyun aaya?,"Bhai, sapno mein fast forward ho gaya 😂💤"
|
344 |
+
"Bhai, tera net slow hai?","Haan bhai, '2G' bhi racer lag raha hai 😂🐢"
|
345 |
+
Tune naya game try kiya?,"Haan bhai, par boss ka 'deadline game' jeet gaya 😂🎮"
|
346 |
+
Tu aaj kal busy hai?,"Haan bhai, bas 'reality check' mein busy hoon 😂🔍"
|
347 |
+
"Bhai, kal kya kiya?","Bhai, bas 'procrastination' ka world record banaya 😂🏅"
|
348 |
+
Tu kal gym gaya tha?,"Haan bhai, bas 'reels' dekhne 😂📱"
|
349 |
+
"Bhai, tune naya reel dekha?","Haan bhai, copy paste bhi kar diya 😂🎥"
|
350 |
+
Tera dukh kya hai?,"Bhai, salary aayi bhi nahi gayi bhi 😂💸"
|
351 |
+
Tu kal cricket khel raha tha?,"Haan bhai, par sirf 'sledging' mein expert ho gaya 😂🏏"
|
352 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas 'ghar se sofa' tak travel hai 😂🛋️"
|
353 |
+
Tune kal movie dekhi?,"Haan bhai, bas ending hi nahi samjhi 😂🎬"
|
354 |
+
Tera net slow hai?,"Bhai, 'loading' se zyada 'buffering' hai 😂📶"
|
355 |
+
"Bhai, tu naya phone le raha hai?","Haan bhai, par 'battery life' ka breakup ho gaya 😂📱"
|
356 |
+
Tune ghar ka kaam kiya?,"Bhai, bas 'remote' ka workout kiya 😂📺"
|
357 |
+
Tu kal late kyun aaya?,"Bhai, bas 'snooze' pe 'snooze' maar raha tha 😂⏰"
|
358 |
+
Tune naya song suna?,"Haan bhai, ab toh 'repeat' pe chal raha hai 😂🎧"
|
359 |
+
Tera boss strict hai?,"Bhai, usko toh 'deadline' se bhi zyada 'deadline' ka darr hai 😂💼"
|
360 |
+
"Bhai, tu aaj kal busy hai?","Haan bhai, bas 'meme scrolling' mein busy hoon 😂📱"
|
361 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, attendance dene gaya tha bas"
|
362 |
+
"Bhai, kal cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
|
363 |
+
"Bhai, weekend pe outing?","Bhai, bas 'ghar se terrace' tak travel hoga"
|
364 |
+
"Bhai, tu cricket khelta hai?","Haan bhai, bas fielding se dushmani hai"
|
365 |
+
"Bhai, kal ka plan?","Bhai, neend ki 'booking' full hai"
|
366 |
+
"Bhai, tu kal late kyun aaya?","Bhai, bas 'alarm snooze' mode mein chala gaya"
|
367 |
+
"Bhai, kal movie dekhi?","Haan bhai, par bas popcorn ka bill yaad hai"
|
368 |
+
"Bhai, tu aaj kal busy rehta hai?","Haan bhai, bas 'workload' se zyada 'overthinkload' hai"
|
369 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, par umpire zyada entertaining tha"
|
370 |
+
"Bhai, weekend pe kya plan hai?","Bhai, bas sapno mein Maldives jaane ka plan hai"
|
371 |
+
"Bhai, weekend pe kya scene?","Bhai, chill maarne ka full mood hai"
|
372 |
+
"Bhai, kal kaam pe gaya?","Haan bhai, bas attendance dene gaya tha"
|
373 |
+
"Bhai, kal ka cricket match dekha?","Haan bhai, bas 'last over' mein dil toota"
|
pages/Chat.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils import (
|
3 |
+
load_model,
|
4 |
+
load_finetuned_model,
|
5 |
+
generate_response,
|
6 |
+
get_hf_token
|
7 |
+
)
|
8 |
+
import os
|
9 |
+
import json
|
10 |
+
from datetime import datetime
|
11 |
+
|
12 |
+
st.set_page_config(page_title="Gemma Chat", layout="wide")
|
13 |
+
|
14 |
+
# -------------------------------
|
15 |
+
# 💡 Theme Toggle
|
16 |
+
# -------------------------------
|
17 |
+
dark_mode = st.sidebar.toggle("🌙 Dark Mode", value=False)
|
18 |
+
|
19 |
+
if dark_mode:
|
20 |
+
st.markdown(
|
21 |
+
"""
|
22 |
+
<style>
|
23 |
+
body { background-color: #1e1e1e; color: #ffffff; }
|
24 |
+
.stTextInput, .stTextArea, .stSelectbox, .stSlider { color: #ffffff !important; }
|
25 |
+
</style>
|
26 |
+
""", unsafe_allow_html=True
|
27 |
+
)
|
28 |
+
|
29 |
+
st.title("💬 Chat with Gemma Model")
|
30 |
+
|
31 |
+
# -------------------------------
|
32 |
+
# 📌 Model Source Selection
|
33 |
+
# -------------------------------
|
34 |
+
model_source = st.sidebar.radio("📌 Select Model Source", ["Local (.pt)", "Hugging Face"])
|
35 |
+
|
36 |
+
# -------------------------------
|
37 |
+
# 🔥 Dynamic Model List
|
38 |
+
# -------------------------------
|
39 |
+
if model_source == "Local (.pt)":
|
40 |
+
model_dir = "models"
|
41 |
+
if not os.path.exists(model_dir):
|
42 |
+
os.makedirs(model_dir)
|
43 |
+
|
44 |
+
local_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
|
45 |
+
|
46 |
+
if local_models:
|
47 |
+
selected_model = st.sidebar.selectbox("🛠️ Select Local Model", local_models)
|
48 |
+
model_path = os.path.join(model_dir, selected_model)
|
49 |
+
else:
|
50 |
+
st.warning("⚠️ No fine-tuned models found. Fine-tune a model first.")
|
51 |
+
st.stop()
|
52 |
+
|
53 |
+
else:
|
54 |
+
hf_models = [
|
55 |
+
"google/gemma-3-1b-it",
|
56 |
+
"google/gemma-3-4b-pt",
|
57 |
+
"google/gemma-3-4b-it",
|
58 |
+
"google/gemma-3-12b-pt",
|
59 |
+
"google/gemma-3-12b-it",
|
60 |
+
"google/gemma-3-27b-pt",
|
61 |
+
"google/gemma-3-27b-it"
|
62 |
+
]
|
63 |
+
selected_model = st.sidebar.selectbox("🛠️ Select Hugging Face Model", hf_models)
|
64 |
+
model_path = None
|
65 |
+
|
66 |
+
# -------------------------------
|
67 |
+
# 🔥 Model Loading
|
68 |
+
# -------------------------------
|
69 |
+
hf_token = get_hf_token()
|
70 |
+
|
71 |
+
if model_source == "Local (.pt)":
|
72 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token) # Base model first
|
73 |
+
model = load_finetuned_model(model, model_path)
|
74 |
+
if model:
|
75 |
+
st.success(f"✅ Local fine-tuned model loaded: `{selected_model}`")
|
76 |
+
else:
|
77 |
+
st.error("❌ Failed to load local model.")
|
78 |
+
st.stop()
|
79 |
+
|
80 |
+
else:
|
81 |
+
tokenizer, model = load_model(selected_model, hf_token)
|
82 |
+
if model:
|
83 |
+
st.success(f"✅ Hugging Face model loaded: `{selected_model}`")
|
84 |
+
else:
|
85 |
+
st.error("❌ Failed to load Hugging Face model.")
|
86 |
+
st.stop()
|
87 |
+
|
88 |
+
# -------------------------------
|
89 |
+
# ⚙️ Model Configuration Panel
|
90 |
+
# -------------------------------
|
91 |
+
st.sidebar.header("⚙️ Model Configuration")
|
92 |
+
temperature = st.sidebar.slider("🔥 Temperature", 0.1, 1.5, 0.7, 0.1)
|
93 |
+
top_p = st.sidebar.slider("🎯 Top-p", 0.1, 1.0, 0.9, 0.1)
|
94 |
+
repetition_penalty = st.sidebar.slider("🔁 Repetition Penalty", 0.5, 2.0, 1.0, 0.1)
|
95 |
+
|
96 |
+
# -------------------------------
|
97 |
+
# 💬 Chat Interface
|
98 |
+
# -------------------------------
|
99 |
+
if "conversation" not in st.session_state:
|
100 |
+
st.session_state.conversation = []
|
101 |
+
|
102 |
+
prompt = st.text_area("💬 Enter your message:", "Hello, how are you?", key="prompt", height=100)
|
103 |
+
max_length = st.slider("📏 Max Response Length", min_value=50, max_value=1000, value=300, step=50)
|
104 |
+
|
105 |
+
# -------------------------------
|
106 |
+
# 🚀 Streaming Response Function
|
107 |
+
# -------------------------------
|
108 |
+
def stream_response():
|
109 |
+
"""
|
110 |
+
Streams the response token by token.
|
111 |
+
"""
|
112 |
+
response = generate_response(prompt, model, tokenizer, max_length)
|
113 |
+
|
114 |
+
if response:
|
115 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
116 |
+
st.session_state.conversation.append({"sender": "👤 You", "message": prompt, "timestamp": timestamp})
|
117 |
+
st.session_state.conversation.append({"sender": "🤖 AI", "message": response, "timestamp": timestamp})
|
118 |
+
return response
|
119 |
+
else:
|
120 |
+
st.error("❌ Failed to generate response.")
|
121 |
+
return None
|
122 |
+
|
123 |
+
# -------------------------------
|
124 |
+
# 🎯 Conversation Controls
|
125 |
+
# -------------------------------
|
126 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
127 |
+
|
128 |
+
if col1.button("🚀 Generate (CTRL+Enter)", help="Use CTRL + Enter to generate"):
|
129 |
+
stream_response()
|
130 |
+
|
131 |
+
if col2.button("🗑️ Clear Conversation"):
|
132 |
+
st.session_state.conversation = []
|
133 |
+
|
134 |
+
# Export & Import
|
135 |
+
if col3.download_button("📥 Export Chat", json.dumps(st.session_state.conversation, indent=4), "chat_history.json"):
|
136 |
+
st.success("✅ Chat exported successfully!")
|
137 |
+
|
138 |
+
uploaded_file = st.file_uploader("📤 Import Conversation", type=["json"])
|
139 |
+
|
140 |
+
if uploaded_file is not None:
|
141 |
+
st.session_state.conversation = json.load(uploaded_file)
|
142 |
+
st.success("✅ Conversation imported successfully!")
|
143 |
+
|
144 |
+
# -------------------------------
|
145 |
+
# 🛠️ Display Conversation
|
146 |
+
# -------------------------------
|
147 |
+
st.subheader("📜 Conversation History")
|
148 |
+
|
149 |
+
for msg in st.session_state.conversation:
|
150 |
+
with st.container():
|
151 |
+
st.markdown(f"**{msg['sender']}** \n🕒 {msg['timestamp']}")
|
152 |
+
st.write(msg['message'])
|
153 |
+
|
pages/Conversion.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils import load_model, convert_to_torchscript, convert_to_onnx, get_hf_token
|
3 |
+
|
4 |
+
st.title("🔧 Model Conversion")
|
5 |
+
|
6 |
+
# Load the HF token from utils
|
7 |
+
hf_token = get_hf_token()
|
8 |
+
|
9 |
+
# Load the model
|
10 |
+
model_path = "fine_tuned_model.pt"
|
11 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
|
12 |
+
|
13 |
+
conversion_option = st.selectbox("Select Conversion Format", ["TorchScript", "ONNX"])
|
14 |
+
|
15 |
+
if st.button("Convert Model"):
|
16 |
+
if conversion_option == "TorchScript":
|
17 |
+
with st.spinner("Converting to TorchScript..."):
|
18 |
+
ts_model = convert_to_torchscript(model)
|
19 |
+
st.success("Model converted to TorchScript!")
|
20 |
+
|
21 |
+
elif conversion_option == "ONNX":
|
22 |
+
with st.spinner("Converting to ONNX..."):
|
23 |
+
onnx_path = convert_to_onnx(model)
|
24 |
+
st.success("Model converted to ONNX!")
|
pages/Dataset_Management.py
ADDED
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import os
|
7 |
+
from utils import (
|
8 |
+
load_dataset,
|
9 |
+
save_dataset,
|
10 |
+
clean_dataset,
|
11 |
+
compute_dataset_score,
|
12 |
+
detect_outliers,
|
13 |
+
apply_transformation,
|
14 |
+
list_datasets,
|
15 |
+
detect_inconsistent_types
|
16 |
+
)
|
17 |
+
|
18 |
+
# -------------------------------
|
19 |
+
# Constants & Setup
|
20 |
+
# -------------------------------
|
21 |
+
DATASET_DIR = "datasets"
|
22 |
+
DEFAULT_DATASET = "train_data.csv"
|
23 |
+
os.makedirs(DATASET_DIR, exist_ok=True) # Ensure directory exists
|
24 |
+
|
25 |
+
# -------------------------------
|
26 |
+
# Sidebar: Dataset Selection
|
27 |
+
# -------------------------------
|
28 |
+
st.sidebar.header("📊 Dataset Selection")
|
29 |
+
|
30 |
+
# List available datasets from the datasets folder
|
31 |
+
available_datasets = list_datasets(DATASET_DIR)
|
32 |
+
dataset_choice = st.sidebar.radio("Choose Dataset Source:", ["Select Existing Dataset", "Upload New Dataset"])
|
33 |
+
|
34 |
+
dataset_path = None
|
35 |
+
|
36 |
+
if dataset_choice == "Select Existing Dataset":
|
37 |
+
if available_datasets:
|
38 |
+
selected_dataset = st.sidebar.selectbox("Select Dataset:", available_datasets)
|
39 |
+
dataset_path = os.path.join(DATASET_DIR, selected_dataset)
|
40 |
+
st.sidebar.success(f"Using `{selected_dataset}` dataset.")
|
41 |
+
else:
|
42 |
+
st.sidebar.warning("No datasets found. Please upload a new dataset.")
|
43 |
+
elif dataset_choice == "Upload New Dataset":
|
44 |
+
uploaded_file = st.sidebar.file_uploader("Upload Dataset (CSV, JSON, or Excel)", type=["csv", "json", "xlsx"])
|
45 |
+
if uploaded_file:
|
46 |
+
file_ext = uploaded_file.name.split('.')[-1].lower()
|
47 |
+
try:
|
48 |
+
if file_ext == "csv":
|
49 |
+
new_df = pd.read_csv(uploaded_file)
|
50 |
+
elif file_ext == "json":
|
51 |
+
new_df = pd.json_normalize(json.load(uploaded_file))
|
52 |
+
elif file_ext == "xlsx":
|
53 |
+
new_df = pd.read_excel(uploaded_file)
|
54 |
+
else:
|
55 |
+
st.error("Unsupported file format.")
|
56 |
+
st.stop()
|
57 |
+
except Exception as e:
|
58 |
+
st.error(f"Error reading file: {e}")
|
59 |
+
st.stop()
|
60 |
+
|
61 |
+
# Save the new dataset with its filename
|
62 |
+
dataset_path = os.path.join(DATASET_DIR, uploaded_file.name)
|
63 |
+
save_dataset(new_df, dataset_path)
|
64 |
+
st.sidebar.success(f"Dataset `{uploaded_file.name}` uploaded successfully!")
|
65 |
+
available_datasets = list_datasets(DATASET_DIR) # Refresh list
|
66 |
+
else:
|
67 |
+
st.sidebar.warning("Please upload a dataset.")
|
68 |
+
|
69 |
+
# -------------------------------
|
70 |
+
# Load the Selected Dataset
|
71 |
+
# -------------------------------
|
72 |
+
if dataset_path:
|
73 |
+
df = load_dataset(dataset_path)
|
74 |
+
if df.empty:
|
75 |
+
st.warning("Dataset is empty or failed to load.")
|
76 |
+
else:
|
77 |
+
df = pd.DataFrame()
|
78 |
+
st.warning("No dataset selected. Please choose or upload a dataset.")
|
79 |
+
|
80 |
+
# -------------------------------
|
81 |
+
# Main App Title & Description
|
82 |
+
# -------------------------------
|
83 |
+
st.title("📊 The Data Hub")
|
84 |
+
|
85 |
+
# -------------------------------
|
86 |
+
# Tabs for Operations
|
87 |
+
# -------------------------------
|
88 |
+
tabs = st.tabs([
|
89 |
+
"View & Summary", "Clean Data",
|
90 |
+
"Visualize Data", "Data Profiling",
|
91 |
+
"Outlier Detection", "Custom Transformations",
|
92 |
+
"Export"
|
93 |
+
])
|
94 |
+
|
95 |
+
# -------------------------------
|
96 |
+
# Tab 1: View & Summary
|
97 |
+
# -------------------------------
|
98 |
+
with tabs[0]:
|
99 |
+
st.subheader("📋 Current Dataset Preview")
|
100 |
+
if not df.empty:
|
101 |
+
st.dataframe(df.head(10))
|
102 |
+
st.markdown("#### 🔎 Basic Statistics")
|
103 |
+
st.write(df.describe(include="all"))
|
104 |
+
else:
|
105 |
+
st.warning("No dataset available. Please choose or upload a dataset.")
|
106 |
+
|
107 |
+
# -------------------------------
|
108 |
+
# Tab 2: Clean Data
|
109 |
+
# -------------------------------
|
110 |
+
with tabs[1]:
|
111 |
+
st.subheader("🧼 Clean Your Dataset")
|
112 |
+
if not df.empty:
|
113 |
+
remove_duplicates = st.checkbox("Remove Duplicate Rows", value=True)
|
114 |
+
fill_missing = st.checkbox("Fill Missing Values", value=False)
|
115 |
+
fill_value = st.text_input("Fill missing values with:", value="0")
|
116 |
+
|
117 |
+
st.markdown("#### Optional: Rename Columns")
|
118 |
+
new_names = {}
|
119 |
+
for col in df.columns:
|
120 |
+
new_names[col] = st.text_input(f"Rename column '{col}'", value=col)
|
121 |
+
|
122 |
+
if st.button("Clean Dataset"):
|
123 |
+
cleaned_df = clean_dataset(df, remove_duplicates, fill_missing, fill_value)
|
124 |
+
cleaned_df = cleaned_df.rename(columns=new_names)
|
125 |
+
save_dataset(cleaned_df, dataset_path)
|
126 |
+
st.success("✅ Dataset cleaned successfully!")
|
127 |
+
st.dataframe(cleaned_df.head())
|
128 |
+
df = cleaned_df
|
129 |
+
else:
|
130 |
+
st.warning("No dataset available for cleaning.")
|
131 |
+
|
132 |
+
# -------------------------------
|
133 |
+
# Tab 3: Visualize Data (Fixed KeyError Issue)
|
134 |
+
# -------------------------------
|
135 |
+
with tabs[2]:
|
136 |
+
st.subheader("📊 Visualize Your Data")
|
137 |
+
|
138 |
+
if not df.empty:
|
139 |
+
viz_type = st.selectbox("Select Visualization Type", ["Histogram", "Scatter", "Box Plot", "Heatmap", "Line Chart"])
|
140 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()
|
141 |
+
|
142 |
+
if numeric_cols:
|
143 |
+
# Validate column selection
|
144 |
+
col = st.selectbox("Select Column", numeric_cols)
|
145 |
+
|
146 |
+
if col: # Ensure valid column selection
|
147 |
+
fig, ax = plt.subplots()
|
148 |
+
|
149 |
+
if viz_type == "Histogram":
|
150 |
+
ax.hist(df[col].dropna(), bins=20, color="skyblue", edgecolor="black")
|
151 |
+
elif viz_type == "Box Plot":
|
152 |
+
sns.boxplot(x=df[col].dropna(), ax=ax)
|
153 |
+
elif viz_type == "Scatter":
|
154 |
+
x_col = st.selectbox("X-axis", numeric_cols)
|
155 |
+
y_col = st.selectbox("Y-axis", numeric_cols)
|
156 |
+
if x_col and y_col:
|
157 |
+
ax.scatter(df[x_col], df[y_col], color="green")
|
158 |
+
elif viz_type == "Heatmap":
|
159 |
+
corr = df[numeric_cols].corr()
|
160 |
+
sns.heatmap(corr, annot=True, cmap="coolwarm", ax=ax)
|
161 |
+
elif viz_type == "Line Chart":
|
162 |
+
ax.plot(df.index, df[col], marker="o")
|
163 |
+
|
164 |
+
st.pyplot(fig)
|
165 |
+
else:
|
166 |
+
st.warning("Please select a valid column.")
|
167 |
+
else:
|
168 |
+
st.warning("No numeric columns available for visualization.")
|
169 |
+
else:
|
170 |
+
st.warning("No dataset available for visualization.")
|
171 |
+
|
172 |
+
# -------------------------------
|
173 |
+
# Tab 4: Data Profiling
|
174 |
+
# -------------------------------
|
175 |
+
with tabs[3]:
|
176 |
+
if not df.empty:
|
177 |
+
|
178 |
+
# -------------------------------
|
179 |
+
# 1. General Dataset Info
|
180 |
+
# -------------------------------
|
181 |
+
st.markdown("### 🛠️ General Information")
|
182 |
+
st.write(f"✅ **Total Rows:** `{df.shape[0]}`")
|
183 |
+
st.write(f"✅ **Total Columns:** `{df.shape[1]}`")
|
184 |
+
st.write(f"✅ **Memory Usage:** `{df.memory_usage(deep=True).sum() / (1024 ** 2):.2f} MB`")
|
185 |
+
st.write(f"✅ **Dataset Shape:** `{df.shape}`")
|
186 |
+
|
187 |
+
# -------------------------------
|
188 |
+
# 2. Dataset Quality Score
|
189 |
+
# -------------------------------
|
190 |
+
st.markdown("### 📊 Dataset Quality Score")
|
191 |
+
score = compute_dataset_score(df)
|
192 |
+
st.success(f"💯 Dataset Quality Score: `{score} / 100`")
|
193 |
+
|
194 |
+
# -------------------------------
|
195 |
+
# 3. Column Overview with Stats
|
196 |
+
# -------------------------------
|
197 |
+
st.markdown("### 🔥 Column Overview")
|
198 |
+
|
199 |
+
# Numeric and categorical columns
|
200 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
201 |
+
categorical_cols = df.select_dtypes(include=["object"]).columns
|
202 |
+
|
203 |
+
profile = pd.DataFrame({
|
204 |
+
"Column": df.columns,
|
205 |
+
"Data Type": df.dtypes.values,
|
206 |
+
"Missing Values": df.isnull().sum().values,
|
207 |
+
"Missing %": (df.isnull().sum() / len(df) * 100).values,
|
208 |
+
"Unique Values": df.nunique().values
|
209 |
+
})
|
210 |
+
|
211 |
+
# Add numeric statistics
|
212 |
+
if len(numeric_cols) > 0:
|
213 |
+
numeric_stats = pd.DataFrame({
|
214 |
+
"Column": numeric_cols,
|
215 |
+
"Min": df[numeric_cols].min().values,
|
216 |
+
"Max": df[numeric_cols].max().values,
|
217 |
+
"Mean": df[numeric_cols].mean().values,
|
218 |
+
"Std Dev": df[numeric_cols].std().values,
|
219 |
+
"Skewness": df[numeric_cols].skew().values,
|
220 |
+
"Kurtosis": df[numeric_cols].kurt().values
|
221 |
+
})
|
222 |
+
|
223 |
+
# Merge stats with the profile
|
224 |
+
profile = profile.merge(numeric_stats, on="Column", how="left")
|
225 |
+
|
226 |
+
st.dataframe(profile)
|
227 |
+
|
228 |
+
# -------------------------------
|
229 |
+
# 4. Missing Values Visualization
|
230 |
+
# -------------------------------
|
231 |
+
st.markdown("### 🔎 Missing Values Distribution")
|
232 |
+
missing_values = df.isnull().sum()
|
233 |
+
missing_values = missing_values[missing_values > 0]
|
234 |
+
|
235 |
+
if not missing_values.empty:
|
236 |
+
fig, ax = plt.subplots(figsize=(12, 5))
|
237 |
+
sns.barplot(x=missing_values.index, y=missing_values.values, ax=ax, color="skyblue")
|
238 |
+
ax.set_title("Missing Values per Column")
|
239 |
+
ax.set_ylabel("Missing Count")
|
240 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
|
241 |
+
st.pyplot(fig)
|
242 |
+
else:
|
243 |
+
st.success("No missing values found!")
|
244 |
+
|
245 |
+
# -------------------------------
|
246 |
+
# 5. Duplicates Detection
|
247 |
+
# -------------------------------
|
248 |
+
st.markdown("### 🔥 Duplicates & Constant Columns Detection")
|
249 |
+
|
250 |
+
# Duplicates
|
251 |
+
duplicate_count = df.duplicated().sum()
|
252 |
+
st.write(f"🔁 **Duplicate Rows:** `{duplicate_count}`")
|
253 |
+
|
254 |
+
# Constant Columns
|
255 |
+
constant_cols = [col for col in df.columns if df[col].nunique() == 1]
|
256 |
+
if constant_cols:
|
257 |
+
st.write(f"🚩 **Constant Columns:** `{constant_cols}`")
|
258 |
+
else:
|
259 |
+
st.success("No constant columns detected!")
|
260 |
+
|
261 |
+
# -------------------------------
|
262 |
+
# 6. Cardinality Analysis
|
263 |
+
# -------------------------------
|
264 |
+
st.markdown("### 🧬 Cardinality Analysis")
|
265 |
+
|
266 |
+
high_cardinality = [col for col in df.columns if df[col].nunique() > len(df) * 0.8]
|
267 |
+
if high_cardinality:
|
268 |
+
st.write(f"🔢 **High-Cardinality Columns:** `{high_cardinality}`")
|
269 |
+
else:
|
270 |
+
st.success("No high-cardinality columns detected!")
|
271 |
+
|
272 |
+
# -------------------------------
|
273 |
+
# 7. Top Frequent & Rare Values
|
274 |
+
# -------------------------------
|
275 |
+
st.markdown("### 🎯 Frequent & Rare Values")
|
276 |
+
|
277 |
+
for col in categorical_cols:
|
278 |
+
st.write(f"✅ **{col}**")
|
279 |
+
|
280 |
+
top_values = df[col].value_counts().nlargest(5)
|
281 |
+
rare_values = df[col].value_counts().nsmallest(5)
|
282 |
+
|
283 |
+
st.write("📊 **Top Frequent Values:**")
|
284 |
+
st.dataframe(top_values)
|
285 |
+
|
286 |
+
st.write("🧪 **Rare Values:**")
|
287 |
+
st.dataframe(rare_values)
|
288 |
+
|
289 |
+
# -------------------------------
|
290 |
+
# 8. Correlation Matrix
|
291 |
+
# -------------------------------
|
292 |
+
st.markdown("### 📊 Correlation Matrix")
|
293 |
+
|
294 |
+
if len(numeric_cols) > 1:
|
295 |
+
corr = df[numeric_cols].corr()
|
296 |
+
|
297 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
298 |
+
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True, ax=ax)
|
299 |
+
st.pyplot(fig)
|
300 |
+
else:
|
301 |
+
st.info("Not enough numeric columns for correlation analysis.")
|
302 |
+
|
303 |
+
# -------------------------------
|
304 |
+
# 9. Pair Plot (Numerical Relationships)
|
305 |
+
# -------------------------------
|
306 |
+
st.markdown("### 🔥 Pair Plot (Numerical Relationships)")
|
307 |
+
|
308 |
+
if len(numeric_cols) >= 2:
|
309 |
+
pairplot = sns.pairplot(df[numeric_cols], diag_kind='kde')
|
310 |
+
st.pyplot(pairplot.fig)
|
311 |
+
else:
|
312 |
+
st.info("Not enough numeric columns for pair plot visualization.")
|
313 |
+
|
314 |
+
# -------------------------------
|
315 |
+
# 10. Outlier Detection
|
316 |
+
# -------------------------------
|
317 |
+
st.markdown("### 🚩 Outlier Detection")
|
318 |
+
|
319 |
+
outliers = detect_outliers(df)
|
320 |
+
if outliers:
|
321 |
+
st.write("✅ **Outliers Detected:**")
|
322 |
+
st.dataframe(pd.DataFrame(outliers.items(), columns=["Column", "Outlier Count"]))
|
323 |
+
else:
|
324 |
+
st.success("No significant outliers detected!")
|
325 |
+
|
326 |
+
# -------------------------------
|
327 |
+
# 11. Inconsistent Data Types
|
328 |
+
# -------------------------------
|
329 |
+
st.markdown("### 🚫 Inconsistent Data Types")
|
330 |
+
|
331 |
+
inconsistent_types = detect_inconsistent_types(df)
|
332 |
+
if inconsistent_types:
|
333 |
+
st.write("⚠️ **Inconsistent Data Types Detected:**")
|
334 |
+
st.write(inconsistent_types)
|
335 |
+
else:
|
336 |
+
st.success("No inconsistent data types detected!")
|
337 |
+
|
338 |
+
else:
|
339 |
+
st.warning("No dataset available for profiling.")
|
340 |
+
|
341 |
+
# -------------------------------
|
342 |
+
# Tab 5: Outlier Detection
|
343 |
+
# -------------------------------
|
344 |
+
with tabs[4]:
|
345 |
+
st.subheader("🚀 Outlier Detection")
|
346 |
+
if not df.empty:
|
347 |
+
outliers = detect_outliers(df)
|
348 |
+
st.write(outliers)
|
349 |
+
else:
|
350 |
+
st.warning("No dataset available for outlier detection.")
|
351 |
+
|
352 |
+
# -------------------------------
|
353 |
+
# Tab 6: Export
|
354 |
+
# -------------------------------
|
355 |
+
with tabs[5]:
|
356 |
+
st.subheader("📤 Export Dataset")
|
357 |
+
export_format = st.selectbox("Export Format", ["CSV", "Excel", "JSON"])
|
358 |
+
if not df.empty:
|
359 |
+
st.download_button("Download", df.to_csv(index=False), f"dataset.{export_format.lower()}")
|
360 |
+
|
pages/Finetune.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from datetime import datetime
|
6 |
+
from utils import (
|
7 |
+
load_model,
|
8 |
+
get_hf_token,
|
9 |
+
simulate_training,
|
10 |
+
plot_training_metrics,
|
11 |
+
load_finetuned_model,
|
12 |
+
save_model
|
13 |
+
)
|
14 |
+
|
15 |
+
st.title("🔥 Fine-tune the Gemma Model")
|
16 |
+
|
17 |
+
# -------------------------------
|
18 |
+
# Finetuning Option Selection
|
19 |
+
# -------------------------------
|
20 |
+
finetune_option = st.radio("Select Finetuning Option", ["Fine-tune from scratch", "Refinetune existing model"])
|
21 |
+
|
22 |
+
# -------------------------------
|
23 |
+
# Model Selection Logic
|
24 |
+
# -------------------------------
|
25 |
+
selected_model = None
|
26 |
+
saved_model_path = None
|
27 |
+
|
28 |
+
if finetune_option == "Fine-tune from scratch":
|
29 |
+
# Display Hugging Face model list
|
30 |
+
model_list = [
|
31 |
+
"google/gemma-3-1b-pt",
|
32 |
+
"google/gemma-3-1b-it",
|
33 |
+
"google/gemma-3-4b-pt",
|
34 |
+
"google/gemma-3-4b-it",
|
35 |
+
"google/gemma-3-12b-pt",
|
36 |
+
"google/gemma-3-12b-it",
|
37 |
+
"google/gemma-3-27b-pt",
|
38 |
+
"google/gemma-3-27b-it"
|
39 |
+
]
|
40 |
+
selected_model = st.selectbox("🛠️ Select Gemma Model to Fine-tune", model_list)
|
41 |
+
|
42 |
+
elif finetune_option == "Refinetune existing model":
|
43 |
+
# Dynamically list all saved models from the /models folder
|
44 |
+
model_dir = "models"
|
45 |
+
|
46 |
+
if os.path.exists(model_dir):
|
47 |
+
saved_models = [f for f in os.listdir(model_dir) if f.endswith(".pt")]
|
48 |
+
else:
|
49 |
+
saved_models = []
|
50 |
+
|
51 |
+
if saved_models:
|
52 |
+
saved_model_path = st.selectbox("Select a saved model to re-finetune", saved_models)
|
53 |
+
saved_model_path = os.path.join(model_dir, saved_model_path)
|
54 |
+
st.success(f"✅ Selected model for refinement: `{saved_model_path}`")
|
55 |
+
else:
|
56 |
+
st.warning("⚠️ No saved models found! Switching to fine-tuning from scratch.")
|
57 |
+
finetune_option = "Fine-tune from scratch"
|
58 |
+
|
59 |
+
# -------------------------------
|
60 |
+
# Dataset Selection
|
61 |
+
# -------------------------------
|
62 |
+
|
63 |
+
st.subheader("📚 Dataset Selection")
|
64 |
+
|
65 |
+
# Dataset source selection
|
66 |
+
dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
|
67 |
+
|
68 |
+
dataset_path = "train_data.csv"
|
69 |
+
|
70 |
+
if dataset_option == "Upload New Dataset":
|
71 |
+
uploaded_file = st.file_uploader("📤 Upload Dataset (CSV or JSON)", type=["csv", "json"])
|
72 |
+
|
73 |
+
if uploaded_file is not None:
|
74 |
+
# Handle CSV or JSON upload
|
75 |
+
if uploaded_file.name.endswith(".csv"):
|
76 |
+
new_data = pd.read_csv(uploaded_file)
|
77 |
+
elif uploaded_file.name.endswith(".json"):
|
78 |
+
json_data = json.load(uploaded_file)
|
79 |
+
new_data = pd.json_normalize(json_data)
|
80 |
+
else:
|
81 |
+
st.error("❌ Unsupported file format. Please upload CSV or JSON.")
|
82 |
+
st.stop()
|
83 |
+
|
84 |
+
# Append or create new dataset
|
85 |
+
if os.path.exists(dataset_path):
|
86 |
+
new_data.to_csv(dataset_path, mode='a', index=False, header=False)
|
87 |
+
st.success(f"✅ Data appended to `{dataset_path}`!")
|
88 |
+
else:
|
89 |
+
new_data.to_csv(dataset_path, index=False)
|
90 |
+
st.success(f"✅ Dataset saved as `{dataset_path}`!")
|
91 |
+
|
92 |
+
elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
|
93 |
+
if os.path.exists(dataset_path):
|
94 |
+
st.success("✅ Using existing `train_data.csv` for fine-tuning.")
|
95 |
+
else:
|
96 |
+
st.error("❌ `train_data.csv` not found! Please upload a new dataset.")
|
97 |
+
st.stop()
|
98 |
+
|
99 |
+
# -------------------------------
|
100 |
+
# Hyperparameters Configuration
|
101 |
+
# -------------------------------
|
102 |
+
learning_rate = st.number_input("📊 Learning Rate", value=1e-4, format="%.5f")
|
103 |
+
batch_size = st.number_input("🛠️ Batch Size", value=16, step=1)
|
104 |
+
epochs = st.number_input("⏱️ Epochs", value=3, step=1)
|
105 |
+
|
106 |
+
# -------------------------------
|
107 |
+
# Fine-tuning Execution
|
108 |
+
# -------------------------------
|
109 |
+
if st.button("🚀 Start Fine-tuning"):
|
110 |
+
st.info(f"Fine-tuning process initiated...")
|
111 |
+
|
112 |
+
# Retrieve Hugging Face Token
|
113 |
+
hf_token = get_hf_token()
|
114 |
+
|
115 |
+
# Model loading logic
|
116 |
+
if finetune_option == "Refinetune existing model" and saved_model_path:
|
117 |
+
# Load the base model first
|
118 |
+
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
|
119 |
+
|
120 |
+
# Load the saved model checkpoint for re-finetuning
|
121 |
+
model = load_finetuned_model(model, saved_model_path)
|
122 |
+
|
123 |
+
if model:
|
124 |
+
st.success(f"✅ Loaded saved model: `{saved_model_path}` for refinement!")
|
125 |
+
else:
|
126 |
+
st.error("❌ Failed to load the saved model. Aborting.")
|
127 |
+
st.stop()
|
128 |
+
|
129 |
+
else:
|
130 |
+
# Fine-tune from scratch (load base model)
|
131 |
+
if not selected_model:
|
132 |
+
st.error("❌ Please select a model to fine-tune.")
|
133 |
+
st.stop()
|
134 |
+
|
135 |
+
tokenizer, model = load_model(selected_model, hf_token)
|
136 |
+
|
137 |
+
if model:
|
138 |
+
st.success(f"✅ Base model loaded: `{selected_model}`")
|
139 |
+
else:
|
140 |
+
st.error("❌ Failed to load the base model. Aborting.")
|
141 |
+
st.stop()
|
142 |
+
|
143 |
+
# Simulate fine-tuning loop
|
144 |
+
progress_bar = st.progress(0)
|
145 |
+
training_placeholder = st.empty()
|
146 |
+
|
147 |
+
for epoch, losses, accs in simulate_training(epochs):
|
148 |
+
fig = plot_training_metrics(epoch, losses, accs)
|
149 |
+
training_placeholder.pyplot(fig)
|
150 |
+
progress_bar.progress(epoch / epochs)
|
151 |
+
|
152 |
+
# Save fine-tuned model with timestamp
|
153 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
154 |
+
new_model_name = f"models/fine_tuned_model_{selected_model.replace('/', '_')}_{timestamp}.pt"
|
155 |
+
|
156 |
+
# Save the fine-tuned model
|
157 |
+
saved_model_path = save_model(model, new_model_name)
|
158 |
+
|
159 |
+
if saved_model_path:
|
160 |
+
st.success(f"✅ Fine-tuning completed! Model saved as `{saved_model_path}`")
|
161 |
+
|
162 |
+
# Load the fine-tuned model for immediate inference
|
163 |
+
model = load_finetuned_model(model, saved_model_path)
|
164 |
+
|
165 |
+
if model:
|
166 |
+
st.success("🛠️ Fine-tuned model loaded and ready for inference!")
|
167 |
+
else:
|
168 |
+
st.error("❌ Failed to load the fine-tuned model for inference.")
|
169 |
+
else:
|
170 |
+
st.error("❌ Failed to save the fine-tuned model.")
|
requirements.txt
CHANGED
@@ -3,4 +3,10 @@ pandas==2.2.3
|
|
3 |
numpy==2.2.4
|
4 |
torch==2.6.0
|
5 |
transformers @ git+https://github.com/huggingface/[email protected]
|
6 |
-
matplotlib==3.10.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
numpy==2.2.4
|
4 |
torch==2.6.0
|
5 |
transformers @ git+https://github.com/huggingface/[email protected]
|
6 |
+
matplotlib==3.10.1
|
7 |
+
rich>=13.1.0
|
8 |
+
FuzzyTM>=0.4.0
|
9 |
+
requests>=2.28.0
|
10 |
+
xlsxwriter>=3.0.1
|
11 |
+
python-dotenv>=0.19.0
|
12 |
+
scipy>=1.7.3
|
utils.py
ADDED
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import time
|
8 |
+
import json
|
9 |
+
import re
|
10 |
+
import os
|
11 |
+
import asyncio
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from scipy.stats import skew, kurtosis, zscore
|
14 |
+
|
15 |
+
# -------------------------------
|
16 |
+
# Environment and Token Management
|
17 |
+
# -------------------------------
|
18 |
+
|
19 |
+
# Load environment variables from .env file in local development
|
20 |
+
load_dotenv()
|
21 |
+
|
22 |
+
def get_hf_token():
|
23 |
+
"""
|
24 |
+
Retrieves HF token from secrets or .env file.
|
25 |
+
"""
|
26 |
+
token = os.getenv("HF_TOKEN") # Prioritize environment variable
|
27 |
+
|
28 |
+
# If not found, fallback to Streamlit secrets
|
29 |
+
if not token:
|
30 |
+
try:
|
31 |
+
token = st.secrets["HF_TOKEN"]
|
32 |
+
except (FileNotFoundError, KeyError):
|
33 |
+
st.error("❌ HF_TOKEN not found. Add it to .env or secrets.toml.")
|
34 |
+
return None
|
35 |
+
|
36 |
+
return token
|
37 |
+
|
38 |
+
|
39 |
+
# -------------------------------
|
40 |
+
# Model Loading and Management
|
41 |
+
# -------------------------------
|
42 |
+
|
43 |
+
async def async_load(model_id: str):
|
44 |
+
"""
|
45 |
+
Dummy async function to initialize the event loop.
|
46 |
+
"""
|
47 |
+
await asyncio.sleep(0.1)
|
48 |
+
|
49 |
+
@st.cache_resource
|
50 |
+
def load_model(model_id: str, token: str, checkpoint_path: str = None):
|
51 |
+
"""
|
52 |
+
Loads and caches the Gemma model and tokenizer with the Hugging Face token.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
model_id (str): The Hugging Face model ID.
|
56 |
+
token (str): The authentication token.
|
57 |
+
checkpoint_path (str): Optional path to a fine-tuned model checkpoint.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
tuple: tokenizer, model
|
61 |
+
"""
|
62 |
+
try:
|
63 |
+
asyncio.run(async_load(model_id))
|
64 |
+
|
65 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
|
66 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, token=token)
|
67 |
+
|
68 |
+
# Load fine-tuned checkpoint if provided
|
69 |
+
if checkpoint_path and os.path.exists(checkpoint_path):
|
70 |
+
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
71 |
+
model.eval()
|
72 |
+
st.success("✅ Fine-tuned model loaded successfully!")
|
73 |
+
|
74 |
+
return tokenizer, model
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
st.error(f"❌ Model loading failed: {e}")
|
78 |
+
return None, None
|
79 |
+
|
80 |
+
|
81 |
+
# -------------------------------
|
82 |
+
# Model Saving Function
|
83 |
+
# -------------------------------
|
84 |
+
|
85 |
+
def save_model(model, model_name: str):
|
86 |
+
"""
|
87 |
+
Saves the fine-tuned model to the specified path.
|
88 |
+
|
89 |
+
Args:
|
90 |
+
model (torch.nn.Module): The PyTorch model instance.
|
91 |
+
model_name (str): The file path to save the model.
|
92 |
+
|
93 |
+
Returns:
|
94 |
+
str: The path where the model is saved.
|
95 |
+
"""
|
96 |
+
try:
|
97 |
+
# Ensure the models directory exists
|
98 |
+
os.makedirs(os.path.dirname(model_name), exist_ok=True)
|
99 |
+
|
100 |
+
# Save the model
|
101 |
+
torch.save(model.state_dict(), model_name)
|
102 |
+
st.success(f"✅ Model saved successfully at `{model_name}`")
|
103 |
+
return model_name
|
104 |
+
except Exception as e:
|
105 |
+
st.error(f"❌ Failed to save model: {e}")
|
106 |
+
return None
|
107 |
+
|
108 |
+
|
109 |
+
# -------------------------------
|
110 |
+
# File Processing and Cleaning
|
111 |
+
# -------------------------------
|
112 |
+
|
113 |
+
def preprocess_data(uploaded_file, file_extension):
|
114 |
+
"""
|
115 |
+
Reads the uploaded file and returns a processed version.
|
116 |
+
Supports CSV, JSONL, and TXT.
|
117 |
+
"""
|
118 |
+
try:
|
119 |
+
if file_extension == "csv":
|
120 |
+
return pd.read_csv(uploaded_file)
|
121 |
+
|
122 |
+
elif file_extension == "jsonl":
|
123 |
+
data = [json.loads(line) for line in uploaded_file.readlines()]
|
124 |
+
try:
|
125 |
+
return pd.DataFrame(data)
|
126 |
+
except Exception:
|
127 |
+
st.warning("⚠️ Unable to convert JSONL to table. Previewing raw JSON.")
|
128 |
+
return data
|
129 |
+
|
130 |
+
elif file_extension == "txt":
|
131 |
+
text_data = uploaded_file.read().decode("utf-8")
|
132 |
+
return text_data.splitlines()
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
st.error(f"❌ Error processing file: {e}")
|
136 |
+
return None
|
137 |
+
|
138 |
+
|
139 |
+
def clean_text(text, lowercase=True, remove_punctuation=True):
|
140 |
+
"""
|
141 |
+
Cleans text data by applying basic normalization.
|
142 |
+
"""
|
143 |
+
if lowercase:
|
144 |
+
text = text.lower()
|
145 |
+
if remove_punctuation:
|
146 |
+
text = re.sub(r'[^\w\s]', '', text)
|
147 |
+
return text
|
148 |
+
|
149 |
+
|
150 |
+
# -------------------------------
|
151 |
+
# Model Conversion and Quantization
|
152 |
+
# -------------------------------
|
153 |
+
|
154 |
+
def quantize_model(model):
|
155 |
+
"""
|
156 |
+
Applies dynamic quantization.
|
157 |
+
"""
|
158 |
+
try:
|
159 |
+
quantized_model = torch.quantization.quantize_dynamic(
|
160 |
+
model, {torch.nn.Linear}, dtype=torch.qint8
|
161 |
+
)
|
162 |
+
st.success("✅ Model quantized successfully!")
|
163 |
+
return quantized_model
|
164 |
+
except Exception as e:
|
165 |
+
st.error(f"❌ Quantization failed: {e}")
|
166 |
+
return model
|
167 |
+
|
168 |
+
|
169 |
+
def convert_to_torchscript(model, output_path="model_ts.pt"):
|
170 |
+
"""
|
171 |
+
Converts the model to TorchScript format.
|
172 |
+
"""
|
173 |
+
try:
|
174 |
+
example_input = torch.randint(0, 100, (1, 10))
|
175 |
+
traced_model = torch.jit.trace(model, example_input)
|
176 |
+
traced_model.save(output_path)
|
177 |
+
return output_path
|
178 |
+
except Exception as e:
|
179 |
+
st.error(f"❌ TorchScript conversion failed: {e}")
|
180 |
+
return None
|
181 |
+
|
182 |
+
|
183 |
+
def convert_to_onnx(model, output_path="model.onnx"):
|
184 |
+
"""
|
185 |
+
Converts the model to ONNX format.
|
186 |
+
"""
|
187 |
+
try:
|
188 |
+
dummy_input = torch.randint(0, 100, (1, 10))
|
189 |
+
torch.onnx.export(model, dummy_input, output_path, input_names=["input"], output_names=["output"])
|
190 |
+
return output_path
|
191 |
+
except Exception as e:
|
192 |
+
st.error(f"❌ ONNX conversion failed: {e}")
|
193 |
+
return None
|
194 |
+
|
195 |
+
|
196 |
+
# -------------------------------
|
197 |
+
# Model Inference and Training
|
198 |
+
# -------------------------------
|
199 |
+
|
200 |
+
def simulate_training(num_epochs):
|
201 |
+
"""
|
202 |
+
Simulates a training loop for demonstration.
|
203 |
+
Yields current epoch, loss values, and accuracy values.
|
204 |
+
"""
|
205 |
+
loss_values = []
|
206 |
+
accuracy_values = []
|
207 |
+
for epoch in range(1, num_epochs + 1):
|
208 |
+
loss = np.exp(-epoch) + np.random.random() * 0.1
|
209 |
+
acc = 0.5 + (epoch / num_epochs) * 0.5 + np.random.random() * 0.05
|
210 |
+
loss_values.append(loss)
|
211 |
+
accuracy_values.append(acc)
|
212 |
+
yield epoch, loss_values, accuracy_values
|
213 |
+
time.sleep(1)
|
214 |
+
|
215 |
+
|
216 |
+
def plot_training_metrics(epochs, loss_values, accuracy_values):
|
217 |
+
"""
|
218 |
+
Plots training loss and accuracy.
|
219 |
+
"""
|
220 |
+
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
|
221 |
+
ax[0].plot(range(1, epochs+1), loss_values, marker='o', color='red')
|
222 |
+
ax[0].set_title("Training Loss")
|
223 |
+
ax[0].set_xlabel("Epoch")
|
224 |
+
ax[0].set_ylabel("Loss")
|
225 |
+
|
226 |
+
ax[1].plot(range(1, epochs+1), accuracy_values, marker='o', color='green')
|
227 |
+
ax[1].set_title("Training Accuracy")
|
228 |
+
ax[1].set_xlabel("Epoch")
|
229 |
+
ax[1].set_ylabel("Accuracy")
|
230 |
+
|
231 |
+
return fig
|
232 |
+
|
233 |
+
|
234 |
+
def generate_response(prompt, model, tokenizer, max_length=200):
|
235 |
+
"""
|
236 |
+
Generates a response using the fine-tuned model.
|
237 |
+
"""
|
238 |
+
try:
|
239 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
240 |
+
|
241 |
+
with torch.no_grad():
|
242 |
+
outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7)
|
243 |
+
|
244 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
245 |
+
|
246 |
+
except Exception as e:
|
247 |
+
st.error(f"❌ Response generation failed: {e}")
|
248 |
+
return ""
|
249 |
+
|
250 |
+
|
251 |
+
# -------------------------------
|
252 |
+
# Model Loading for Inference
|
253 |
+
# -------------------------------
|
254 |
+
|
255 |
+
def load_finetuned_model(model, checkpoint_path="fine_tuned_model.pt"):
|
256 |
+
"""
|
257 |
+
Loads a fine-tuned model from a checkpoint.
|
258 |
+
"""
|
259 |
+
if os.path.exists(checkpoint_path):
|
260 |
+
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu')))
|
261 |
+
model.eval()
|
262 |
+
st.success("✅ Fine-tuned model loaded successfully!")
|
263 |
+
else:
|
264 |
+
st.error(f"❌ Checkpoint not found: {checkpoint_path}")
|
265 |
+
return model
|
266 |
+
|
267 |
+
|
268 |
+
|
269 |
+
import pandas as pd
|
270 |
+
import os
|
271 |
+
import pyarrow as pa
|
272 |
+
import numpy as np
|
273 |
+
from scipy.stats import zscore, kurtosis, skew
|
274 |
+
|
275 |
+
|
276 |
+
# ======================================
|
277 |
+
# Dataset Operations
|
278 |
+
# ======================================
|
279 |
+
def load_dataset(path: str) -> pd.DataFrame:
|
280 |
+
"""Load dataset from CSV with error handling."""
|
281 |
+
try:
|
282 |
+
df = pd.read_csv(path)
|
283 |
+
return make_arrow_compatible(df)
|
284 |
+
except Exception as e:
|
285 |
+
print(f"Error loading dataset: {e}")
|
286 |
+
return pd.DataFrame()
|
287 |
+
|
288 |
+
|
289 |
+
def save_dataset(df: pd.DataFrame, path: str):
|
290 |
+
"""Save dataset to CSV with error handling."""
|
291 |
+
try:
|
292 |
+
df.to_csv(path, index=False)
|
293 |
+
except Exception as e:
|
294 |
+
print(f"Error saving dataset: {e}")
|
295 |
+
|
296 |
+
|
297 |
+
def list_datasets(directory: str = "datasets") -> list:
|
298 |
+
"""List all available datasets in the directory."""
|
299 |
+
try:
|
300 |
+
return [f for f in os.listdir(directory) if f.endswith(('.csv', '.json', '.xlsx'))]
|
301 |
+
except Exception as e:
|
302 |
+
print(f"Error listing datasets: {e}")
|
303 |
+
return []
|
304 |
+
|
305 |
+
|
306 |
+
# ======================================
|
307 |
+
# Data Cleaning Functions
|
308 |
+
# ======================================
|
309 |
+
def clean_dataset(
|
310 |
+
df: pd.DataFrame,
|
311 |
+
remove_duplicates: bool = True,
|
312 |
+
fill_missing: bool = False,
|
313 |
+
fill_value: str = "0",
|
314 |
+
trim_spaces: bool = True
|
315 |
+
) -> pd.DataFrame:
|
316 |
+
"""
|
317 |
+
Clean the dataset with multiple operations:
|
318 |
+
- Remove duplicates
|
319 |
+
- Fill missing values
|
320 |
+
- Trim spaces
|
321 |
+
- Remove empty columns and rows
|
322 |
+
- Auto-cast date columns
|
323 |
+
"""
|
324 |
+
# Remove duplicates
|
325 |
+
if remove_duplicates:
|
326 |
+
df = df.drop_duplicates()
|
327 |
+
|
328 |
+
# Fill missing values
|
329 |
+
if fill_missing:
|
330 |
+
df = df.fillna(fill_value)
|
331 |
+
|
332 |
+
# Trim spaces
|
333 |
+
if trim_spaces:
|
334 |
+
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
335 |
+
|
336 |
+
# Remove empty columns & rows
|
337 |
+
df = df.dropna(how="all", axis=1)
|
338 |
+
df = df.dropna(how="all", axis=0)
|
339 |
+
|
340 |
+
# Auto-cast date columns
|
341 |
+
for col in df.columns:
|
342 |
+
try:
|
343 |
+
df[col] = pd.to_datetime(df[col])
|
344 |
+
except (ValueError, TypeError):
|
345 |
+
pass
|
346 |
+
|
347 |
+
return make_arrow_compatible(df)
|
348 |
+
|
349 |
+
|
350 |
+
# --------------------------------------
|
351 |
+
# Dataset Quality Score
|
352 |
+
# --------------------------------------
|
353 |
+
def compute_dataset_score(df):
|
354 |
+
"""Compute dataset quality score."""
|
355 |
+
if df.empty:
|
356 |
+
return 0.0
|
357 |
+
|
358 |
+
total_cells = np.product(df.shape)
|
359 |
+
missing_cells = df.isnull().sum().sum()
|
360 |
+
missing_ratio = missing_cells / total_cells
|
361 |
+
|
362 |
+
duplicate_ratio = 1 - (df.drop_duplicates().shape[0] / df.shape[0])
|
363 |
+
|
364 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
365 |
+
if len(numeric_cols) > 0:
|
366 |
+
skew_vals = df[numeric_cols].apply(lambda x: np.abs(skew(x.dropna())), axis=0)
|
367 |
+
kurt_vals = df[numeric_cols].apply(lambda x: np.abs(kurtosis(x.dropna())), axis=0)
|
368 |
+
numeric_score = 1 - (skew_vals.mean() + kurt_vals.mean()) / 10
|
369 |
+
else:
|
370 |
+
numeric_score = 1
|
371 |
+
|
372 |
+
score = (1 - missing_ratio) * (1 - duplicate_ratio) * numeric_score * 100
|
373 |
+
return round(score, 2)
|
374 |
+
|
375 |
+
|
376 |
+
# --------------------------------------
|
377 |
+
# Outlier Detection
|
378 |
+
# --------------------------------------
|
379 |
+
def detect_outliers(df, threshold=3):
|
380 |
+
"""Detect outliers in numeric columns using Z-score."""
|
381 |
+
numeric_cols = df.select_dtypes(include=["number"]).columns
|
382 |
+
outliers = {}
|
383 |
+
for col in numeric_cols:
|
384 |
+
z_scores = np.abs(zscore(df[col].dropna()))
|
385 |
+
outliers[col] = np.sum(z_scores > threshold)
|
386 |
+
return outliers
|
387 |
+
|
388 |
+
|
389 |
+
# --------------------------------------
|
390 |
+
# Detect Inconsistent Types
|
391 |
+
# --------------------------------------
|
392 |
+
def detect_inconsistent_types(df):
|
393 |
+
"""Detect inconsistent data types across columns."""
|
394 |
+
inconsistent_cols = {}
|
395 |
+
for col in df.columns:
|
396 |
+
if df[col].apply(type).nunique() > 1:
|
397 |
+
inconsistent_cols[col] = df[col].apply(type).value_counts().to_dict()
|
398 |
+
return inconsistent_cols
|
399 |
+
|
400 |
+
|
401 |
+
# ======================================
|
402 |
+
# Data Transformations
|
403 |
+
# ======================================
|
404 |
+
def apply_transformation(df: pd.DataFrame, col: str, transform: str) -> pd.DataFrame:
|
405 |
+
"""
|
406 |
+
Apply transformations to a specified column:
|
407 |
+
- Log Transformation
|
408 |
+
- Min-Max Normalization
|
409 |
+
- Z-score Standardization
|
410 |
+
"""
|
411 |
+
if col not in df.columns:
|
412 |
+
raise KeyError(f"Column '{col}' not found in dataset")
|
413 |
+
|
414 |
+
if transform == "Log":
|
415 |
+
df[col] = np.log1p(df[col].replace(0, np.nan)).fillna(0)
|
416 |
+
|
417 |
+
elif transform == "Normalize":
|
418 |
+
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
419 |
+
|
420 |
+
elif transform == "Standardize":
|
421 |
+
df[col] = (df[col] - df[col].mean()) / df[col].std()
|
422 |
+
|
423 |
+
return make_arrow_compatible(df)
|
424 |
+
|
425 |
+
|
426 |
+
# ======================================
|
427 |
+
# Normalization & Standardization
|
428 |
+
# ======================================
|
429 |
+
def normalize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
|
430 |
+
"""Normalize column (Min-Max Scaling)."""
|
431 |
+
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
432 |
+
return df
|
433 |
+
|
434 |
+
|
435 |
+
def standardize_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
|
436 |
+
"""Standardize column (Z-score)."""
|
437 |
+
df[col] = (df[col] - df[col].mean()) / df[col].std()
|
438 |
+
return df
|
439 |
+
|
440 |
+
|
441 |
+
# ======================================
|
442 |
+
# Arrow Compatibility & Fixes
|
443 |
+
# ======================================
|
444 |
+
def make_arrow_compatible(df: pd.DataFrame) -> pd.DataFrame:
|
445 |
+
"""
|
446 |
+
Ensure dataset compatibility with Streamlit Arrow serialization.
|
447 |
+
"""
|
448 |
+
for col in df.columns:
|
449 |
+
if df[col].dtype == object:
|
450 |
+
try:
|
451 |
+
df[col] = df[col].astype(str)
|
452 |
+
except Exception as e:
|
453 |
+
print(f"Could not convert column {col}: {e}")
|
454 |
+
return df
|
455 |
+
|
456 |
+
|
457 |
+
def fix_arrow_incompatibility(df: pd.DataFrame) -> pd.DataFrame:
|
458 |
+
"""
|
459 |
+
Fix Arrow incompatibility by converting mixed types to `str`.
|
460 |
+
"""
|
461 |
+
for col in df.columns:
|
462 |
+
try:
|
463 |
+
pa.Table.from_pandas(df[[col]])
|
464 |
+
except pa.lib.ArrowInvalid:
|
465 |
+
print(f"Arrow compatibility issue in column: {col}")
|
466 |
+
df[col] = df[col].astype(str)
|
467 |
+
return df
|
468 |
+
|