update Part_B.py

31be7c66 · Jaralus · d495cffc · 31be7c66 · 31be7c66
Commit 31be7c66 authored Jun 3, 2023 by Jaralus
--- a/Part_A.py
+++ b/Part_A.py
@@ -14,46 +14,54 @@ from sklearn.ensemble import RandomForestClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.neighbors import KNeighborsClassifier
+# This function performs cross-validation on a given classifier
 def cross_validation(classifier, features, labels, scoring_metrics):
+    # Perform cross-validation using the given classifier, features, labels, and scoring metrics
    scores = cross_val_score(classifier, features, labels, cv=5, scoring=scoring_metrics)
+    # Return the mean score of the cross-validation
    return scores.mean()
 def main():
    best_classifier_average = 0
+    # Read the training data
    train_data = pd.read_csv("TrainingDataBinary.csv", header = None)
+    # Shuffle the training data
    train_data_shuffled = shuffle(train_data)
+    # Split the training data into features and labels
    train_features = train_data_shuffled.iloc[:, :-1] # Select everything apart from the last column
    train_labels = train_data_shuffled.iloc[:, -1] # Select the last column
+    # Scale the training features
    scaler = StandardScaler()
    train_features_scaled = scaler.fit_transform(train_features)
+    # Define the hyperparameters for the Bayes search
    parameters = {
        "solver" : ["lbfgs", "sgd", "adam"],
        "activation" : ["relu", "logistic", "tanh"],
        "learning_rate" : ["constant", "invscaling", "adaptive"]
    }
+    # Perform Bayesian optimization to find the best hyperparameters
    search = BayesSearchCV(MLPClassifier(max_iter = 10000), parameters, n_iter = 50, n_jobs = -1, cv = 5, scoring = "accuracy").fit(train_features_scaled, train_labels)
    print(f"Best Score: {search.best_score_}")
    print(f"Best Hyperparameters: {search.best_params_}")
-    """
+    # Train the classifiers
    classifiers = [
-                   #LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels),
+        LogisticRegression(max_iter = 10000, solver = "newton-cg", C = 9.416).fit(train_features_scaled, train_labels),
        MLPClassifier(max_iter = 10000, solver = "adam", activation = "tanh", learning_rate = "constant").fit(train_features_scaled, train_labels)
-                   #SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels),
+        SVC(C = 7.989999999999979, kernel = "linear").fit(train_features_scaled, train_labels),
        #RandomForestClassifier(n_estimators = 418, max_depth = 5).fit(train_features_scaled, train_labels),
        #DecisionTreeClassifier(max_features = "sqrt", criterion = "gini", max_depth = 19).fit(train_features_scaled, train_labels),
        #KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels)
    ]
+    # Evaluate the performance of the trained classifiers
    for classifier in classifiers:
        train_predicted_labels = classifier.predict(train_features_scaled)
        train_accuracy = accuracy_score(train_labels, train_predicted_labels)
@@ -66,21 +74,24 @@ def main():
        cv_average = ((cv_accuracy + cv_f1) / 2)
        print(f"Average Score: {cv_average}")
+        # Update the best classifier if the current classifier has a better average score
        if (best_classifier_average < cv_average):
            best_classifier = classifier
            best_classifier_average = cv_average
+    # Read the training data
    test_features = pd.read_csv("TestingDataBinary.csv", header = None)
+    # Scale the test features
    test_features_scaled = scaler.transform(test_features)
+    # Predict the labels for the test data
    test_predicted_labels = best_classifier.predict(test_features_scaled)
+    # Save the test results
    test_results = test_features.copy()
    test_results["Predicted Labels"] = test_predicted_labels
    test_results.to_csv("TestingResultsBinary.csv", header = False, index = False)
-    """
 if __name__ == "__main__":
    main()
\ No newline at end of file
--- a/Part_B.py
+++ b/Part_B.py
@@ -53,6 +53,7 @@ def main():
                   #KNeighborsClassifier(n_neighbors = 4, n_jobs = -1, leaf_size = 68, metric = "manhattan", weights = "distance", algorithm = "kd_tree").fit(train_features_scaled, train_labels)
                   ]
+    #Heya
    for classifier in classifiers:
        train_predicted_labels = classifier.predict(train_features_scaled)