Skip to content
Snippets Groups Projects
Commit da78c344 authored by acj1g19's avatar acj1g19
Browse files

Added source code with required datasets

parent 0f4829a3
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:a115ed80 tags:
``` python
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA, FastICA
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge, Lasso
#Load csv datasets into pandas dataframes
binTrain = pd.read_csv('TrainingDataBinary.csv', header=None)
binTest = pd.read_csv('TestingDataBinary.csv', header=None)
mulTrain = pd.read_csv('TrainingDataMulti.csv', header=None)
mulTest = pd.read_csv('TestingDataMulti.csv', header=None)
#Get feature set from first 128 columns of the datasets
binTrainX = binTrain.iloc[:, :128]
mulTrainX = mulTrain.iloc[:, :128]
#Get label set from last column (129th) of the datasets
binTrainY = binTrain.iloc[:, 128]
mulTrainY = mulTrain.iloc[:, 128]
#Split feature and label sets into training and testing sets with 80/20 split
#Random state seed used to reproduce performance results for testing
binX_train, binX_test, binY_train, binY_test = train_test_split(binTrainX, binTrainY, test_size=0.2, random_state = 2)
mulX_train, mulX_test, mulY_train, mulY_test = train_test_split(mulTrainX, mulTrainY, test_size=0.2, random_state = 3)
```
%% Cell type:code id:14ef5b96 tags:
``` python
#Random Forest created using optimal hyperparamters from grid search
rf = RandomForestClassifier(random_state = 1, n_estimators = 50, criterion = 'gini', min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt')
#Train model using training features and labels
rf.fit(binX_train, binY_train)
#Get predicted labels for test features
preds = rf.predict(binX_test)
#Get F1 score from predicted labels against actual labels
print(f"F1 Score: {f1_score(binY_test, preds, average='macro')}")
#Get F1 score, accuracy, precision and recall of each class from predicted labels against actual labels
print(classification_report(binY_test, preds))
#Create and display confusion matrix of results
cm = confusion_matrix(binY_test, preds, labels=rf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=rf.classes_)
disp.plot()
plt.show()
```
%% Output
F1 Score: 0.9916638647990019
precision recall f1-score support
0 1.00 0.99 0.99 614
1 0.99 1.00 0.99 586
accuracy 0.99 1200
macro avg 0.99 0.99 0.99 1200
weighted avg 0.99 0.99 0.99 1200
%% Cell type:code id:e25a4b1b tags:
``` python
#Get prediction labels for testing data
test_preds = rf.predict(binTest)
#Add labels to last (129th) column of testing dataset
binTestLabels = binTest.assign(Label=test_preds)
#Save testing data with labels as csv
binTestLabels.to_csv('TestingResultsBinary.csv', index = False, header = False)
```
%% Cell type:code id:d0fe268b tags:
``` python
#Random Forest created using optimal hyperparamters from grid search
rf = RandomForestClassifier(random_state = 1, n_estimators = 150, criterion = 'gini', min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt')
#Train model using training features and labels
rf.fit(mulX_train, mulY_train)
#Get predicted labels for test features
preds = rf.predict(mulX_test)
#Get F1 score from predicted labels against actual labels
print(f"F1 Score: {f1_score(mulY_test, preds, average='macro')}")
#Get F1 score, accuracy, precision and recall of each class from predicted labels against actual labels
print(classification_report(mulY_test, preds))
#Create and display confusion matrix of results
cm = confusion_matrix(mulY_test, preds, labels=rf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=rf.classes_)
disp.plot()
plt.show()
```
%% Output
F1 Score: 0.9535275147686754
precision recall f1-score support
0 0.99 0.99 0.99 589
1 0.96 0.92 0.94 321
2 0.92 0.94 0.93 290
accuracy 0.96 1200
macro avg 0.95 0.95 0.95 1200
weighted avg 0.96 0.96 0.96 1200
%% Cell type:code id:249be46a tags:
``` python
#Get prediction labels for testing data
test_preds = rf.predict(mulTest)
#Add labels to last (129th) column of testing dataset
mulTestLabels = mulTest.assign(Label=test_preds)
#Save testing data with labels as csv
mulTestLabels.to_csv('TestingResultsMulti.csv', index = False, header = False)
```
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment