Random Forest Tree with test train split
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 14 13:20:09 2023
@author: Syed Kamran Bukhari
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset=pd.read_csv("possum.csv")
from sklearn.preprocessing import LabelEncoder
label_encoder= LabelEncoder()
dataset["sex"]=label_encoder.fit_transform(dataset["sex"])
selected_feature_X = ["Pop", "sex", "age", "chest"]
selected_feature_Y = ["belly"]
X=dataset[selected_feature_X]
Y=dataset[selected_feature_Y]
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoders', OneHotEncoder(),["Pop"])], remainder="passthrough")
X=np.array(ct.fit_transform(X))
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan , strategy="most_frequent")
imputer.fit(X)
X=imputer.fit_transform(X)
imputer.fit(Y)
Y=imputer.fit_transform(Y)
from sklearn.model_selection import train_test_split
X_train, X_test , Y_train, Y_test = train_test_split(X, Y, test_size= 0.2 , random_state=0)
#randomforest
from sklearn.ensemble import RandomForestRegressor
forest_regressor = RandomForestRegressor(n_estimators=90 , random_state= 0)
forest_regressor.fit(X_train, Y_train)
Y_pred = forest_regressor.predict(X_test)
Y_pred_abs = Y_pred.reshape(len(Y_pred), 1)
#RMSE
residual = abs(Y_test - Y_pred_abs)
ss=residual**2
ss=ss.sum()/len(Y_test)
from math import sqrt
ss= sqrt(ss)
print("RMSE score is = ", ss)
#R2 Score
from sklearn.metrics import r2_score
R2 = r2_score(Y_test, Y_pred)
print ("R2 Score is = ", R2)
#Durbin watson
from statsmodels.stats.stattools import durbin_watson
DB = durbin_watson(residual)
print("Durbin Watson Score is = ", DB)
Comments
Post a Comment