Upload New File

a372f872 · xz36n22 · 5928bf44 · a372f872
Commit a372f872 authored 2 years ago by xz36n22
--- a/PartB_Xinyi_Zhang.py
+++ b/PartB_Xinyi_Zhang.py
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import confusion_matrix, f1_score
+
+# Load the dataset
+data = pd.read_csv('TrainingDataMulti.csv')
+
+# Separate features (X) and labels (y)
+X = data.iloc[:, :-1]  # All columns except the last one
+y = data.iloc[:, -1]   # Last column (marker)
+
+# Split the data into training and validation sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Create a Random Forest classifier
+rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=70, min_samples_split=2)
+
+# Train the classifier
+rf_classifier.fit(X_train, y_train)
+
+# Make predictions on the validation set
+y_test_pred = rf_classifier.predict(X_test)
+
+# Evaluate the accuracy of the model on the validation set
+accuracy = accuracy_score(y_test, y_test_pred)
+print("Validation Accuracy:", accuracy)
+
+# Calculate the confusion matrix
+cm = confusion_matrix(y_test, y_test_pred)
+
+# Convert the confusion matrix to a DataFrame for better visualization
+cm_df = pd.DataFrame(cm, index=['True Label 0', 'True Label 1', 'True Label 2'], columns=['Predicted Label 0', 'Predicted Label 1', 'Predicted Label 2'])
+
+# Create a heatmap of the confusion matrix
+plt.figure(figsize=(8, 6))
+sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
+plt.title('Confusion Matrix')
+plt.xlabel('Predicted Labels')
+plt.ylabel('True Labels')
+plt.show()
+
+# Calculate the F1 score
+f1 = f1_score(y_test, y_test_pred, average='macro')  # Use 'macro' averaging for multi-class classification
+print("F1 Score:", f1)
+
+
+# Load the testing data
+test_data = pd.read_csv('TestingDataMulti.csv')
+
+# Make predictions on the testing data
+y_test_pred = rf_classifier.predict(test_data)
+
+# Add the predicted labels as a new column named "marker"
+test_data['marker'] = y_test_pred
+
+# Save the updated testing data to a new CSV file
+#test_data.to_csv('C:\\Users\\97061\\Documents\\Southampton\\Modules\\Semester2\\COMP3217 Security for CPS\\Assignment\\Assignment2\\Testoutputfile\\TestingResultsMulti.csv', index=False)
+test_data.to_csv('TestingResultsMulti.csv', index=False)
+
+# Print the predicted labels for the testing data
+print("Predicted labels for testing data:")
+print(y_test_pred)