Skip to content
Snippets Groups Projects
Commit a372f872 authored by xz36n22's avatar xz36n22
Browse files

Upload New File

parent 5928bf44
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, f1_score
# Load the dataset
data = pd.read_csv('TrainingDataMulti.csv')
# Separate features (X) and labels (y)
X = data.iloc[:, :-1] # All columns except the last one
y = data.iloc[:, -1] # Last column (marker)
# Split the data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=70, min_samples_split=2)
# Train the classifier
rf_classifier.fit(X_train, y_train)
# Make predictions on the validation set
y_test_pred = rf_classifier.predict(X_test)
# Evaluate the accuracy of the model on the validation set
accuracy = accuracy_score(y_test, y_test_pred)
print("Validation Accuracy:", accuracy)
# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_test_pred)
# Convert the confusion matrix to a DataFrame for better visualization
cm_df = pd.DataFrame(cm, index=['True Label 0', 'True Label 1', 'True Label 2'], columns=['Predicted Label 0', 'Predicted Label 1', 'Predicted Label 2'])
# Create a heatmap of the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()
# Calculate the F1 score
f1 = f1_score(y_test, y_test_pred, average='macro') # Use 'macro' averaging for multi-class classification
print("F1 Score:", f1)
# Load the testing data
test_data = pd.read_csv('TestingDataMulti.csv')
# Make predictions on the testing data
y_test_pred = rf_classifier.predict(test_data)
# Add the predicted labels as a new column named "marker"
test_data['marker'] = y_test_pred
# Save the updated testing data to a new CSV file
#test_data.to_csv('C:\\Users\\97061\\Documents\\Southampton\\Modules\\Semester2\\COMP3217 Security for CPS\\Assignment\\Assignment2\\Testoutputfile\\TestingResultsMulti.csv', index=False)
test_data.to_csv('TestingResultsMulti.csv', index=False)
# Print the predicted labels for the testing data
print("Predicted labels for testing data:")
print(y_test_pred)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment