From 60d0255c65320183464ed594df06955a685f47f8 Mon Sep 17 00:00:00 2001
From: rw1u22 <rw1u22@soton.ac.uk>
Date: Thu, 8 Jun 2023 10:32:14 +0000
Subject: [PATCH] Update PartB.py

---
 PartB.py | 142 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 85 insertions(+), 57 deletions(-)

diff --git a/PartB.py b/PartB.py
index c97be40..f62f44a 100644
--- a/PartB.py
+++ b/PartB.py
@@ -1,7 +1,7 @@
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import accuracy_score
+from sklearn.metrics import accuracy_score, f1_score
 
 # Load the training data and testing data into a Pandas DataFrame
 data = pd.read_csv('TrainingDataMulti.csv', header=None)
@@ -14,59 +14,87 @@ y = data.iloc[:, -1]
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
 
 
-# Fit data using Logistic Regression model training
-LR = LogisticRegression(C=1e5, max_iter=5000)
-LR.fit(X_train, y_train)
-
-# Make predictions on the testing data
-y_pred = LR.predict(X_test)
-
-# Evaluate the accuracy of the model
-accuracy = accuracy_score(y_test, y_pred)
-print(f"LogisticRegression Accuracy: {accuracy}")
-
-
-# Fit data using Decision Tree model training
-from sklearn.tree import DecisionTreeClassifier
-DT = DecisionTreeClassifier(random_state=0)
-DT.fit(X_train, y_train)
-
-# Make predictions on the testing data
-y_pred = DT.predict(X_test)
-
-# Evaluate the accuracy of the model
-accuracy = accuracy_score(y_test, y_pred)
-print(f"DecisionTree Accuracy: {accuracy}")
-
-
-# Fit data using Random Forest model training
-from sklearn.ensemble import RandomForestClassifier
-RF = RandomForestClassifier(random_state=0)
-RF.fit(X_train, y_train)
-y_pred = RF.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-print(f"RandomForest Accuracy: {accuracy}")
-
-
-# Fit data using SVM model training
-from sklearn.svm import SVC
-SVM = SVC(random_state=0)
-SVM.fit(X_train, y_train)
-y_pred = SVM.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-print(f"SVM Accuracy: {accuracy}")
-
-
-# Fit data using KNN model training
-from sklearn.neighbors import KNeighborsClassifier
-KNN = KNeighborsClassifier()
-KNN.fit(X_train, y_train)
-y_pred = KNN.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-print(f"KNN Accuracy: {accuracy}")
-
-
-# Use Random Forest model to predict the test data and save the results to a CSV file
-pridictions = RF.predict(test_data)
-test_data['Prediction'] = pridictions
-test_data.to_csv('TestingResultsMulti.csv', index=False, header=False)
+def Logistic_Regression():
+    # Fit data using Logistic Regression model training
+    LR = LogisticRegression(C=1e5, max_iter=5000)
+    LR.fit(X_train, y_train)
+
+    # Make predictions on the testing data
+    y_pred = LR.predict(X_test)
+
+    # Testing the accuracy of the model
+    accuracy = accuracy_score(y_test, y_pred)
+    # Testing the F1 score of the model
+    f1 = f1_score(y_test, y_pred, average='weighted')
+    print("+-------------------------------------------+")
+    print(f"LogisticRegression Accuracy: {accuracy}")
+    print(f"LogisticRegression F1 Score: {f1}")
+
+
+def Decision_Tree():
+    # Fit data using Decision Tree model training
+    from sklearn.tree import DecisionTreeClassifier
+    DT = DecisionTreeClassifier(random_state=0)
+    DT.fit(X_train, y_train)
+
+    # Make predictions on the testing data
+    y_pred = DT.predict(X_test)
+
+    # Evaluate the accuracy of the model
+    accuracy = accuracy_score(y_test, y_pred)
+    # Testing the F1 score of the model
+    f1 = f1_score(y_test, y_pred, average='weighted')
+    print("+-------------------------------------------+")
+    print(f"DecisionTree Accuracy: {accuracy}")
+    print(f"DecisionTree F1 Score: {f1}")
+
+
+def Random_Forest():
+    # Fit data using Random Forest model training
+    from sklearn.ensemble import RandomForestClassifier
+    RF = RandomForestClassifier(random_state=0)
+    RF.fit(X_train, y_train)
+    y_pred = RF.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred, average='weighted')
+    print("+-------------------------------------------+")
+    print(f"RandomForest Accuracy: {accuracy}")
+    print(f"RandomForest F1 Score: {f1}")
+    return RF
+
+
+def svm():
+    # Fit data using SVM model training
+    from sklearn.svm import SVC
+    SVM = SVC(random_state=0)
+    SVM.fit(X_train, y_train)
+    y_pred = SVM.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred, average='weighted')
+    print("+-------------------------------------------+")
+    print(f"SVM Accuracy: {accuracy}")
+    print(f"SVM F1 Score: {f1}")
+
+
+def knn():
+    # Fit data using KNN model training
+    from sklearn.neighbors import KNeighborsClassifier
+    KNN = KNeighborsClassifier()
+    KNN.fit(X_train, y_train)
+    y_pred = KNN.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1 = f1_score(y_test, y_pred, average='weighted')
+    print("+-------------------------------------------+")
+    print(f"KNN Accuracy: {accuracy}")
+    print(f"KNN F1 Score: {f1}")
+
+if __name__ == "__main__":
+    Logistic_Regression()
+    Decision_Tree()
+    RF = Random_Forest()
+    svm()
+    knn()
+    # Use Random Forest model to predict the test data and save the results to a CSV file
+    pridictions = RF.predict(test_data)
+    test_data['Prediction'] = pridictions
+    test_data.to_csv('TestingResultsMulti.csv', index=False, header=False)
-- 
GitLab