Select Git revision
development.ini
PartA_Xinyi_Zhang.py 2.23 KiB
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, f1_score
# Load the dataset
data = pd.read_csv('TrainingDataBinary.csv')
# Separate features (X) and labels (y)
X = data.iloc[:, :-1] # All columns except the last one
y = data.iloc[:, -1] # Last column (marker)
# Split the data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=50, min_samples_split=2)
# Train the classifier
rf_classifier.fit(X_train, y_train)
# Make predictions on the validation set
y_test_pred = rf_classifier.predict(X_test)
# Evaluate the accuracy of the model on the validation set
accuracy = accuracy_score(y_test, y_test_pred)
print("Validation Accuracy:", accuracy)
# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_test_pred)
# Convert the confusion matrix to a DataFrame for better visualization
cm_df = pd.DataFrame(cm, index=['True Label 0', 'True Label 1'], columns=['Predicted Label 0', 'Predicted Label 1'])
# Create a heatmap of the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
# Compute F1 score
f1 = f1_score(y_test, y_test_pred)
print("F1 Score:", f1)
# Load the testing data
test_data = pd.read_csv('TestingDataBinary.csv')
# Make predictions on the testing data
y_test_pred = rf_classifier.predict(test_data)
# Add the predicted labels as a new column named "marker"
test_data['marker'] = y_test_pred
# Save the updated testing data to a new CSV file
#test_data.to_csv('C:\\Users\\97061\\Documents\\Southampton\\Modules\\Semester2\\COMP3217 Security for CPS\\Assignment\\Assignment2\\Testoutputfile\\TestingResultsBinary.csv', index=False)
test_data.to_csv('TestingResultsBinary.csv', index=False)
# Print the predicted labels for the testing data
print("Predicted labels for testing data:")
print(y_test_pred)