Replace exercise1.py with updated version after further work.

8716a0cf · ejc1g20 · 66c6cc45 · 8716a0cf
Commit 8716a0cf authored Jun 4, 2023 by ejc1g20
--- a/exercise1.py
+++ b/exercise1.py
@@ -13,40 +13,28 @@ y = training_data["129"]
 # Get a list of the input variables
 X = training_data.drop(["129"], axis=1)
+# Split data into test and train
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-# Create and fit a Random Forest Classifier model to the training data
-random_forest = RandomForestClassifier()
-random_forest.fit(X_train, y_train)
-# Calculate training and testing accuracies
-training_accuracy = random_forest.score(X_train, y_train)
-testing_accuracy = random_forest.score(X_test, y_test)
-print("Training accuracy = " + str(training_accuracy))
-print("Testing accuracy = " + str(testing_accuracy))
 # # define hyperparameter values for grid search
 # param_grid = {
 #     'bootstrap': [True],
-#     'max_depth': [10, 20, 40, 80, 160],
+#     'max_depth': [5, 10, 15, 20],
-#     'max_features': [20, 40, 80, 160],
+#     'max_features': [5, 10, 15],
 #     'n_estimators': [128, 256, 512, 1024]
 # }
+#
 # # Create a model to use grid search
 # grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=3, n_jobs=10, verbose=1)
 # grid_search.fit(X_train, y_train)
 #
-# print(grid_search.best_params_)  # {'bootstrap': True, 'max_depth': 40, 'max_features': 20, 'n_estimators': 512}
+# print(grid_search.best_params_)  # {'bootstrap': True, 'max_depth': 20, 'max_features': 15, 'n_estimators': 128}
 # Using the best hyperparameter grid
 random_forest_tuned = RandomForestClassifier(bootstrap=True,
-                                             max_depth=60,
+                                             max_depth=20,
-                                             max_features=20,
+                                             max_features=15,
-                                             n_estimators=512)
+                                             n_estimators=128)
-# Fit and train the random forest tuned model
 random_forest_tuned.fit(X_train, y_train)
 training_accuracy_tuned = random_forest_tuned.score(X_train, y_train)
 testing_accuracy_tuned = random_forest_tuned.score(X_test, y_test)
@@ -58,11 +46,9 @@ print("Testing accuracy tuned = " + str(testing_accuracy_tuned))
 # Read the testing data into a pandas dataframe
 testing_data = pd.read_csv("data/TestingDataBinary.csv", header=None)
-# Make predictions of the classifications of the testing data
 tuned_predictions = random_forest_tuned.predict(testing_data)
 print(tuned_predictions)
 # Make a file TestingResultsBinary.csv that contains the 128 numbers and the labels in each line
 testing_data[128] = tuned_predictions
 testing_data.to_csv("output/TestingResultsBinary.csv")