Skip to content
Snippets Groups Projects
Commit f272e1de authored by Jaralus's avatar Jaralus
Browse files

update Part_A.py

parent 0e392093
Branches
No related tags found
No related merge requests found
......@@ -22,6 +22,7 @@ def cross_validation(classifier, features, labels, scoring_metrics):
return scores.mean()
def main():
best_classifier_name = ""
best_classifier_average = 0
# Read the training data
......@@ -38,6 +39,8 @@ def main():
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
# The following commented out code was used to find optimal hyperparameters
'''
# Define the hyperparameters for the Bayes search
parameters = {
"solver" : ["lbfgs", "sgd", "adam"],
......@@ -49,36 +52,42 @@ def main():
search = BayesSearchCV(MLPClassifier(max_iter = 10000), parameters, n_iter = 50, n_jobs = -1, cv = 5, scoring = "accuracy").fit(train_features_scaled, train_labels)
print(f"Best Score: {search.best_score_}")
print(f"Best Hyperparameters: {search.best_params_}")
'''
# Train the classifiers
classifiers = [
LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels),
MLPClassifier(max_iter = 10000, solver = "adam", activation = "tanh", learning_rate = "constant").fit(train_features_scaled, train_labels),
SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels),
#RandomForestClassifier(n_estimators = 418, max_depth = 5).fit(train_features_scaled, train_labels),
#DecisionTreeClassifier(max_features = "sqrt", criterion = "gini", max_depth = 19).fit(train_features_scaled, train_labels),
#KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels)
("Logistical Regression" , LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels)),
("Multi-layer Perceptron" , MLPClassifier(max_iter = 10000, solver = "adam", activation = "tanh", learning_rate = "constant").fit(train_features_scaled, train_labels)),
("C-Support Vector" , SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels)),
# These algorithms were optimised and trained but were found to be overfitting
#("Random Forest" , RandomForestClassifier(n_estimators = 418, max_depth = 5).fit(train_features_scaled, train_labels)),
#("Decision Tree" , DecisionTreeClassifier(max_features = "sqrt", criterion = "gini", max_depth = 19).fit(train_features_scaled, train_labels)),
#("K-Nearest Neighbours" , KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels))
]
# Evaluate the performance of the trained classifiers
for classifier in classifiers:
for classifier_name, classifier in classifiers:
print(f"Training {classifier_name}...")
train_predicted_labels = classifier.predict(train_features_scaled)
train_accuracy = accuracy_score(train_labels, train_predicted_labels)
print(f"Training Accuracy:", train_accuracy)
cv_accuracy = cross_validation(classifier, train_features_scaled, train_labels, "accuracy")
print(f"Cross-Validated Accuracy: {cv_accuracy}")
#cv_f1 = cross_validation(classifier, train_features_scaled, train_labels, "f1")
cv_f1 = cross_validation(classifier, train_features_scaled, train_labels, "f1")
cv_f1 = cv_accuracy
#print(f"Cross-Validated F1-Score: {cv_f1}")
print(f"Cross-Validated F1-Score: {cv_f1}")
cv_average = ((cv_accuracy + cv_f1) / 2)
print(f"Average Score: {cv_average}")
print(f"Average Score: {cv_average} \n")
# Update the best classifier if the current classifier has a better average score
if (best_classifier_average < cv_average):
best_classifier_name = classifier_name
best_classifier = classifier
best_classifier_average = cv_average
print(f"The best classifier is {best_classifier_name}.\n")
# Read the training data
test_features = pd.read_csv("TestingDataBinary.csv", header = None)
......@@ -93,5 +102,7 @@ def main():
test_results["Predicted Labels"] = test_predicted_labels
test_results.to_csv("TestingResultsBinary.csv", header = False, index = False)
print("Test data was trained and new labels have been predicted.")
if __name__ == "__main__":
main()
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment