From 60d0255c65320183464ed594df06955a685f47f8 Mon Sep 17 00:00:00 2001 From: rw1u22 <rw1u22@soton.ac.uk> Date: Thu, 8 Jun 2023 10:32:14 +0000 Subject: [PATCH] Update PartB.py --- PartB.py | 142 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 57 deletions(-) diff --git a/PartB.py b/PartB.py index c97be40..f62f44a 100644 --- a/PartB.py +++ b/PartB.py @@ -1,7 +1,7 @@ import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score +from sklearn.metrics import accuracy_score, f1_score # Load the training data and testing data into a Pandas DataFrame data = pd.read_csv('TrainingDataMulti.csv', header=None) @@ -14,59 +14,87 @@ y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0) -# Fit data using Logistic Regression model training -LR = LogisticRegression(C=1e5, max_iter=5000) -LR.fit(X_train, y_train) - -# Make predictions on the testing data -y_pred = LR.predict(X_test) - -# Evaluate the accuracy of the model -accuracy = accuracy_score(y_test, y_pred) -print(f"LogisticRegression Accuracy: {accuracy}") - - -# Fit data using Decision Tree model training -from sklearn.tree import DecisionTreeClassifier -DT = DecisionTreeClassifier(random_state=0) -DT.fit(X_train, y_train) - -# Make predictions on the testing data -y_pred = DT.predict(X_test) - -# Evaluate the accuracy of the model -accuracy = accuracy_score(y_test, y_pred) -print(f"DecisionTree Accuracy: {accuracy}") - - -# Fit data using Random Forest model training -from sklearn.ensemble import RandomForestClassifier -RF = RandomForestClassifier(random_state=0) -RF.fit(X_train, y_train) -y_pred = RF.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) -print(f"RandomForest Accuracy: {accuracy}") - - -# Fit data using SVM model training -from sklearn.svm import SVC -SVM = SVC(random_state=0) -SVM.fit(X_train, y_train) -y_pred = SVM.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) -print(f"SVM Accuracy: {accuracy}") - - -# Fit data using KNN model training -from sklearn.neighbors import KNeighborsClassifier -KNN = KNeighborsClassifier() -KNN.fit(X_train, y_train) -y_pred = KNN.predict(X_test) -accuracy = accuracy_score(y_test, y_pred) -print(f"KNN Accuracy: {accuracy}") - - -# Use Random Forest model to predict the test data and save the results to a CSV file -pridictions = RF.predict(test_data) -test_data['Prediction'] = pridictions -test_data.to_csv('TestingResultsMulti.csv', index=False, header=False) +def Logistic_Regression(): + # Fit data using Logistic Regression model training + LR = LogisticRegression(C=1e5, max_iter=5000) + LR.fit(X_train, y_train) + + # Make predictions on the testing data + y_pred = LR.predict(X_test) + + # Testing the accuracy of the model + accuracy = accuracy_score(y_test, y_pred) + # Testing the F1 score of the model + f1 = f1_score(y_test, y_pred, average='weighted') + print("+-------------------------------------------+") + print(f"LogisticRegression Accuracy: {accuracy}") + print(f"LogisticRegression F1 Score: {f1}") + + +def Decision_Tree(): + # Fit data using Decision Tree model training + from sklearn.tree import DecisionTreeClassifier + DT = DecisionTreeClassifier(random_state=0) + DT.fit(X_train, y_train) + + # Make predictions on the testing data + y_pred = DT.predict(X_test) + + # Evaluate the accuracy of the model + accuracy = accuracy_score(y_test, y_pred) + # Testing the F1 score of the model + f1 = f1_score(y_test, y_pred, average='weighted') + print("+-------------------------------------------+") + print(f"DecisionTree Accuracy: {accuracy}") + print(f"DecisionTree F1 Score: {f1}") + + +def Random_Forest(): + # Fit data using Random Forest model training + from sklearn.ensemble import RandomForestClassifier + RF = RandomForestClassifier(random_state=0) + RF.fit(X_train, y_train) + y_pred = RF.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average='weighted') + print("+-------------------------------------------+") + print(f"RandomForest Accuracy: {accuracy}") + print(f"RandomForest F1 Score: {f1}") + return RF + + +def svm(): + # Fit data using SVM model training + from sklearn.svm import SVC + SVM = SVC(random_state=0) + SVM.fit(X_train, y_train) + y_pred = SVM.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average='weighted') + print("+-------------------------------------------+") + print(f"SVM Accuracy: {accuracy}") + print(f"SVM F1 Score: {f1}") + + +def knn(): + # Fit data using KNN model training + from sklearn.neighbors import KNeighborsClassifier + KNN = KNeighborsClassifier() + KNN.fit(X_train, y_train) + y_pred = KNN.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average='weighted') + print("+-------------------------------------------+") + print(f"KNN Accuracy: {accuracy}") + print(f"KNN F1 Score: {f1}") + +if __name__ == "__main__": + Logistic_Regression() + Decision_Tree() + RF = Random_Forest() + svm() + knn() + # Use Random Forest model to predict the test data and save the results to a CSV file + pridictions = RF.predict(test_data) + test_data['Prediction'] = pridictions + test_data.to_csv('TestingResultsMulti.csv', index=False, header=False) -- GitLab