diff --git a/task2.py b/task2.py new file mode 100644 index 0000000000000000000000000000000000000000..250b3e7fd12af477078d26db3f9ff818ea1b1f3c --- /dev/null +++ b/task2.py @@ -0,0 +1,40 @@ +# Importing libraries +import pandas as pd +import seaborn as sns +from sklearn.linear_model import LogisticRegression +from sklearn import datasets +import matplotlib.pyplot as plt +from sklearn.metrics import accuracy_score,f1_score +from sklearn.inspection import DecisionBoundaryDisplay +import matplotlib.pyplot as plt +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import train_test_split +# Importing the csv files +testDF= pd.read_csv('TestingDataMulti.csv', header=None) +trainDF= pd.read_csv('TrainingDataMulti.csv', header=None) +# Use of Random forest model function +randomfunc = RandomForestClassifier() +# Getting the data reading for fitting +X = trainDF.iloc[:,:-1] +y = trainDF.iloc[:,-1] +#Creating the variables for both testing and training by splitting the data into 80% for training and 20% for testing. +X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=.2, random_state=21) +randomfunc.fit(X_train, y_train) +y_predictDF = randomfunc.predict(X_test) +# Computing the prediction error +prediction_error = 1 - accuracy_score(y_test, y_predictDF) +print("Prediction Error:", prediction_error) +# Predicting using the X_test variable +accuracy = accuracy_score(y_test, y_predictDF) +print("Accuracy:", accuracy) +# Evaluating and Displaying the accuracy of our predictions +label = randomfunc.predict(testDF) +# By utilizing the testing data, we determine the anticipated values/predictions. +Output = pd.DataFrame(label, columns=['Prediction']) +# Saving the predictions to a CSV file +testDF['Prediction'] = Output["Prediction"].tolist() +testDF.to_csv("TestingResultsMulti.csv", index=False) + + + +