Spaces:

Maaz1
/

Banglore_RealEstate_forecast-using-CICD-piplines

Sleeping

Banglore_RealEstate_forecast-using-CICD-piplines / src /model.py

Maaz Uddin

allfilesupload

e0a433a about 2 months ago

3.89 kB

	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import mean_squared_error, r2_score
	import pickle # Import pickle for saving models
	import os # Import os for directory operations

	class ModelBuilder:
	def __init__(self, data):
	"""Initialize with the dataset."""
	self.data = data
	self.model = None

	def split_data(self, target_column, test_size=0.2, random_state=42):
	"""Splits the data into training and testing sets."""
	if target_column not in self.data.columns:
	raise ValueError(f"Target column '{target_column}' not found in the dataset.")

	X = self.data.drop(columns=[target_column])
	y = self.data[target_column]

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=test_size, random_state=random_state
	)

	#print('x_test:', X_test.head())
	#print('First 15 column names:', X_test.columns[:15])
	#print('First 15 column data:', X_test.iloc[:15, :10])
	print(f"Data split complete: Train size = {len(X_train)}, Test size = {len(X_test)}")
	return X_train, X_test, y_train, y_test

	def train_model(self, X_train, y_train):
	"""Trains a Linear Regression model."""
	self.model = LinearRegression()
	self.model.fit(X_train, y_train)
	print("Model training complete.")

	def evaluate_model(self, X_test, y_test):
	"""Evaluates the model on the test set."""
	if self.model is None:
	raise ValueError("Model has not been trained yet.")

	y_pred = self.model.predict(X_test)
	mse = mean_squared_error(y_test, y_pred)
	r2 = r2_score(y_test, y_pred)
	accuracy = self.model.score(X_test, y_test)

	print(f"Model Evaluation:\nMean Squared Error: {mse}\nR2 Score(accuracy): {r2}")
	return mse, r2


	def save_model_as_pickle(self, model_path='models/lr_regg.pkl'):
	"""Save the trained model as a pickle file."""
	if self.model is None:
	raise ValueError("Model has not been trained yet.")

	# Create the models directory if it doesn't exist
	#os.makedirs(os.path.dirname(model_path), exist_ok=True)

	# Save the model
	with open(model_path, 'wb') as file:
	pickle.dump(self.model, file)

	print(f"Model saved as pickle at {model_path}")
	return model_path


	def save_features_as_pickle(self, data, target_column='price', file_path='models/feature_names.pkl'):
	"""
	Extract feature names from the data and save them as a pickle file.

	Args:
	data (pd.DataFrame): Input dataset.
	target_column (str): Name of the target column to exclude from features.
	file_path (str): Path to save the pickle file.
	"""
	# Ensure the target column exists
	if target_column not in data.columns:
	raise ValueError(f"Target column '{target_column}' not found in the dataset.")

	# Drop the target column and extract feature names
	feature_names = data.drop(columns=[target_column]).columns.tolist()

	# Ensure directory exists
	os.makedirs(os.path.dirname(file_path), exist_ok=True)

	# Save the feature names as a pickle file
	with open(file_path, "wb") as file:
	pickle.dump(feature_names, file)

	print(f"Feature names saved to {file_path}")

	def load_model_from_pickle(self, model_path):
	"""Load a model from a pickle file."""
	if not os.path.exists(model_path):
	raise FileNotFoundError(f"No model found at {model_path}")

	with open(model_path, 'rb') as file:
	self.model = pickle.load(file)

	print(f"Model loaded from {model_path}")
	return self.model