Maaz Uddin
allfilesupload
e0a433a
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pickle # Import pickle for saving models
import os # Import os for directory operations
class ModelBuilder:
def __init__(self, data):
"""Initialize with the dataset."""
self.data = data
self.model = None
def split_data(self, target_column, test_size=0.2, random_state=42):
"""Splits the data into training and testing sets."""
if target_column not in self.data.columns:
raise ValueError(f"Target column '{target_column}' not found in the dataset.")
X = self.data.drop(columns=[target_column])
y = self.data[target_column]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_size, random_state=random_state
)
#print('x_test:', X_test.head())
#print('First 15 column names:', X_test.columns[:15])
#print('First 15 column data:', X_test.iloc[:15, :10])
print(f"Data split complete: Train size = {len(X_train)}, Test size = {len(X_test)}")
return X_train, X_test, y_train, y_test
def train_model(self, X_train, y_train):
"""Trains a Linear Regression model."""
self.model = LinearRegression()
self.model.fit(X_train, y_train)
print("Model training complete.")
def evaluate_model(self, X_test, y_test):
"""Evaluates the model on the test set."""
if self.model is None:
raise ValueError("Model has not been trained yet.")
y_pred = self.model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
accuracy = self.model.score(X_test, y_test)
print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score(accuracy): {r2}")
return mse, r2
def save_model_as_pickle(self, model_path='models/lr_regg.pkl'):
"""Save the trained model as a pickle file."""
if self.model is None:
raise ValueError("Model has not been trained yet.")
# Create the models directory if it doesn't exist
#os.makedirs(os.path.dirname(model_path), exist_ok=True)
# Save the model
with open(model_path, 'wb') as file:
pickle.dump(self.model, file)
print(f"Model saved as pickle at {model_path}")
return model_path
def save_features_as_pickle(self, data, target_column='price', file_path='models/feature_names.pkl'):
"""
Extract feature names from the data and save them as a pickle file.
Args:
data (pd.DataFrame): Input dataset.
target_column (str): Name of the target column to exclude from features.
file_path (str): Path to save the pickle file.
"""
# Ensure the target column exists
if target_column not in data.columns:
raise ValueError(f"Target column '{target_column}' not found in the dataset.")
# Drop the target column and extract feature names
feature_names = data.drop(columns=[target_column]).columns.tolist()
# Ensure directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Save the feature names as a pickle file
with open(file_path, "wb") as file:
pickle.dump(feature_names, file)
print(f"Feature names saved to {file_path}")
def load_model_from_pickle(self, model_path):
"""Load a model from a pickle file."""
if not os.path.exists(model_path):
raise FileNotFoundError(f"No model found at {model_path}")
with open(model_path, 'rb') as file:
self.model = pickle.load(file)
print(f"Model loaded from {model_path}")
return self.model