From 047d965c0b8fe97d8bf4946406cd93f5b449e729 Mon Sep 17 00:00:00 2001
From: ejc1g20 <ejc1g20@soton.ac.uk>
Date: Sun, 4 Jun 2023 16:38:35 +0000
Subject: [PATCH] Replace exercise2.py with updated version after further work

---
 exercise2.py | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/exercise2.py b/exercise2.py
index 9eaa9fc..1f8e31f 100644
--- a/exercise2.py
+++ b/exercise2.py
@@ -13,25 +13,14 @@ y = training_data["129"]
 # Get a list of the input variables
 X = training_data.drop(["129"], axis=1)
 
+# Split data into test and train
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
-# Create and fit a Random Forest Classifier model to the training data
-random_forest = RandomForestClassifier()
-random_forest.fit(X_train, y_train)
-
-# Calculate training and testing accuracies
-training_accuracy = random_forest.score(X_train, y_train)
-testing_accuracy = random_forest.score(X_test, y_test)
-
-print("Training accuracy = " + str(training_accuracy))
-print("Testing accuracy = " + str(testing_accuracy))
-
-
 # # define hyperparameter values for grid search
 # param_grid = {
 #     'bootstrap': [True],
-#     'max_depth': [10, 20, 40, 80, 160],
-#     'max_features': [20, 40, 80, 160],
+#     'max_depth': [5, 10, 15, 20],
+#     'max_features': [5, 10, 15],
 #     'n_estimators': [128, 256, 512, 1024]
 # }
 #
@@ -39,14 +28,13 @@ print("Testing accuracy = " + str(testing_accuracy))
 # grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=3, n_jobs=10, verbose=1)
 # grid_search.fit(X_train, y_train)
 #
-# print(grid_search.best_params_)  # {'bootstrap': True, 'max_depth': 40, 'max_features': 20, 'n_estimators': 512}
+# print(grid_search.best_params_)  # {'bootstrap': True, 'max_depth': 20, 'max_features': 10, 'n_estimators': 128}
 
 # Using the best hyperparameter grid
 random_forest_tuned = RandomForestClassifier(bootstrap=True,
-                                             max_depth=60,
-                                             max_features=20,
-                                             n_estimators=512)
-
+                                             max_depth=20,
+                                             max_features=10,
+                                             n_estimators=128)
 # Fit and train the random forest tuned model
 random_forest_tuned.fit(X_train, y_train)
 training_accuracy_tuned = random_forest_tuned.score(X_train, y_train)
@@ -65,4 +53,4 @@ print(tuned_predictions)
 
 # Make a file TestingResultsBinary.csv that contains the 128 numbers and the labels in each line
 testing_data[128] = tuned_predictions
-testing_data.to_csv("output/TestingResultsMulti.csv")
+testing_data.to_csv("output/TestingResultsMulti.csv")
\ No newline at end of file
-- 
GitLab