One Hot Encoder and Linear Regression
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 9 23:39:19 2023
@author: Syed Kamran Bukhari
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset=pd.read_csv("possum.csv")
selected_features_x=["Pop", "sex", "age", "chest"]
selected_features_y=["belly"]
X=dataset[selected_features_x]
Y=dataset[selected_features_y]
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoders', OneHotEncoder(),["Pop","sex"])], remainder='passthrough')
X=np.array(ct.fit_transform(X))
from sklearn.impute import SimpleImputer
imputer=SimpleImputer(missing_values=np.nan,strategy="mean")
imputer.fit(X)
X=imputer.fit_transform(X)
imputer.fit(Y)
Y=imputer.fit_transform(Y)
from sklearn.preprocessing import PolynomialFeatures
poly_reg=PolynomialFeatures(degree=4)
poly_reg.fit_transform(X,Y)
from sklearn.linear_model import LinearRegression
linear_regression = LinearRegression()
linear_regression.fit(X, Y)
Y_pred=linear_regression.predict([[1, 0, 0, 1, 8, 28]])
print("\n")
#RMSE score
residual = abs(Y)- abs(linear_regression.predict(X))
ss= residual**2
ss=ss.sum()
import math
RMSE=math.sqrt(ss)
print('The RMSE score is =', RMSE)
#R2 score
from sklearn.metrics import r2_score
R2= r2_score(Y, linear_regression.predict(X))
print('The R2 score is =',R2)
#Durbin Watson Statistics
from statsmodels.stats.stattools import durbin_watson
DW = durbin_watson(residual)
print('Durbin Watson Statistics =',DW)
Comments
Post a Comment