Skip to content
Snippets Groups Projects
Commit 31be7c66 authored by Jaralus's avatar Jaralus
Browse files

update Part_B.py

parent d495cffc
No related branches found
No related tags found
Loading
...@@ -14,46 +14,54 @@ from sklearn.ensemble import RandomForestClassifier ...@@ -14,46 +14,54 @@ from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
# This function performs cross-validation on a given classifier
def cross_validation(classifier, features, labels, scoring_metrics): def cross_validation(classifier, features, labels, scoring_metrics):
# Perform cross-validation using the given classifier, features, labels, and scoring metrics
scores = cross_val_score(classifier, features, labels, cv=5, scoring=scoring_metrics) scores = cross_val_score(classifier, features, labels, cv=5, scoring=scoring_metrics)
# Return the mean score of the cross-validation
return scores.mean() return scores.mean()
def main(): def main():
best_classifier_average = 0 best_classifier_average = 0
# Read the training data
train_data = pd.read_csv("TrainingDataBinary.csv", header = None) train_data = pd.read_csv("TrainingDataBinary.csv", header = None)
# Shuffle the training data
train_data_shuffled = shuffle(train_data) train_data_shuffled = shuffle(train_data)
# Split the training data into features and labels
train_features = train_data_shuffled.iloc[:, :-1] # Select everything apart from the last column train_features = train_data_shuffled.iloc[:, :-1] # Select everything apart from the last column
train_labels = train_data_shuffled.iloc[:, -1] # Select the last column train_labels = train_data_shuffled.iloc[:, -1] # Select the last column
# Scale the training features
scaler = StandardScaler() scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features) train_features_scaled = scaler.fit_transform(train_features)
# Define the hyperparameters for the Bayes search
parameters = { parameters = {
"solver" : ["lbfgs", "sgd", "adam"], "solver" : ["lbfgs", "sgd", "adam"],
"activation" : ["relu", "logistic", "tanh"], "activation" : ["relu", "logistic", "tanh"],
"learning_rate" : ["constant", "invscaling", "adaptive"] "learning_rate" : ["constant", "invscaling", "adaptive"]
} }
# Perform Bayesian optimization to find the best hyperparameters
search = BayesSearchCV(MLPClassifier(max_iter = 10000), parameters, n_iter = 50, n_jobs = -1, cv = 5, scoring = "accuracy").fit(train_features_scaled, train_labels) search = BayesSearchCV(MLPClassifier(max_iter = 10000), parameters, n_iter = 50, n_jobs = -1, cv = 5, scoring = "accuracy").fit(train_features_scaled, train_labels)
print(f"Best Score: {search.best_score_}") print(f"Best Score: {search.best_score_}")
print(f"Best Hyperparameters: {search.best_params_}") print(f"Best Hyperparameters: {search.best_params_}")
"""
# Train the classifiers
classifiers = [ classifiers = [
#LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels), LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels),
MLPClassifier(max_iter = 10000, solver = "adam", activation = "tanh", learning_rate = "constant").fit(train_features_scaled, train_labels) MLPClassifier(max_iter = 10000, solver = "adam", activation = "tanh", learning_rate = "constant").fit(train_features_scaled, train_labels)
#SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels), SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels),
#RandomForestClassifier(n_estimators = 418, max_depth = 5).fit(train_features_scaled, train_labels), #RandomForestClassifier(n_estimators = 418, max_depth = 5).fit(train_features_scaled, train_labels),
#DecisionTreeClassifier(max_features = "sqrt", criterion = "gini", max_depth = 19).fit(train_features_scaled, train_labels), #DecisionTreeClassifier(max_features = "sqrt", criterion = "gini", max_depth = 19).fit(train_features_scaled, train_labels),
#KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels) #KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels)
] ]
# Evaluate the performance of the trained classifiers
for classifier in classifiers: for classifier in classifiers:
train_predicted_labels = classifier.predict(train_features_scaled) train_predicted_labels = classifier.predict(train_features_scaled)
train_accuracy = accuracy_score(train_labels, train_predicted_labels) train_accuracy = accuracy_score(train_labels, train_predicted_labels)
...@@ -66,21 +74,24 @@ def main(): ...@@ -66,21 +74,24 @@ def main():
cv_average = ((cv_accuracy + cv_f1) / 2) cv_average = ((cv_accuracy + cv_f1) / 2)
print(f"Average Score: {cv_average}") print(f"Average Score: {cv_average}")
# Update the best classifier if the current classifier has a better average score
if (best_classifier_average < cv_average): if (best_classifier_average < cv_average):
best_classifier = classifier best_classifier = classifier
best_classifier_average = cv_average best_classifier_average = cv_average
# Read the training data
test_features = pd.read_csv("TestingDataBinary.csv", header = None) test_features = pd.read_csv("TestingDataBinary.csv", header = None)
# Scale the test features
test_features_scaled = scaler.transform(test_features) test_features_scaled = scaler.transform(test_features)
# Predict the labels for the test data
test_predicted_labels = best_classifier.predict(test_features_scaled) test_predicted_labels = best_classifier.predict(test_features_scaled)
# Save the test results
test_results = test_features.copy() test_results = test_features.copy()
test_results["Predicted Labels"] = test_predicted_labels test_results["Predicted Labels"] = test_predicted_labels
test_results.to_csv("TestingResultsBinary.csv", header = False, index = False) test_results.to_csv("TestingResultsBinary.csv", header = False, index = False)
"""
if __name__ == "__main__": if __name__ == "__main__":
main() main()
\ No newline at end of file
...@@ -53,6 +53,7 @@ def main(): ...@@ -53,6 +53,7 @@ def main():
#KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels) #KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels)
] ]
#Heya
for classifier in classifiers: for classifier in classifiers:
train_predicted_labels = classifier.predict(train_features_scaled) train_predicted_labels = classifier.predict(train_features_scaled)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment