Linear Regressor Technique
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 16 13:25:06 2023
@author: Syed Kamran Bukhari
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#Load CSV file
dataset = pd.read_csv('50_Startups.csv')
X= dataset.iloc[:,:-1].values
Y= dataset.iloc[:,-1].values
#One Hot encoding
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder ='passthrough')
X= np.array(ct.fit_transform(X))
#dividing the dataset to training and Test
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size=0.2, random_state=1)
#import Regressor
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train, Y_train)
#prediction values
Y_pred=regressor.predict(X_test)
#RMSE score
residual = abs(Y_test-Y_pred)
ss= residual**2
ss=ss.sum()/len(Y_test)
print('The RMSE score is =', ss)
#R2 score
from sklearn.metrics import r2_score
R2= r2_score(Y_test, Y_pred)
print('The R2 score is =',R2)
#Durbin Watson Statistics
from statsmodels.stats.stattools import durbin_watson
DW = durbin_watson(residual)
print('Durbin Watson Statistics =',DW)
#scatter plot for Training test
plt.scatter(Y_train, regressor.predict(X_train),color = 'red')
#plt.plot(X_train, regressor.predict(X_train), color='Blue')
plt.title('Original vs Predicted Values (training Set')
plt.xlabel('Original Values')
plt.ylabel('Predicted Values')
plt.show()
Comments
Post a Comment