From 047d965c0b8fe97d8bf4946406cd93f5b449e729 Mon Sep 17 00:00:00 2001 From: ejc1g20 <ejc1g20@soton.ac.uk> Date: Sun, 4 Jun 2023 16:38:35 +0000 Subject: [PATCH] Replace exercise2.py with updated version after further work --- exercise2.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/exercise2.py b/exercise2.py index 9eaa9fc..1f8e31f 100644 --- a/exercise2.py +++ b/exercise2.py @@ -13,25 +13,14 @@ y = training_data["129"] # Get a list of the input variables X = training_data.drop(["129"], axis=1) +# Split data into test and train X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) -# Create and fit a Random Forest Classifier model to the training data -random_forest = RandomForestClassifier() -random_forest.fit(X_train, y_train) - -# Calculate training and testing accuracies -training_accuracy = random_forest.score(X_train, y_train) -testing_accuracy = random_forest.score(X_test, y_test) - -print("Training accuracy = " + str(training_accuracy)) -print("Testing accuracy = " + str(testing_accuracy)) - - # # define hyperparameter values for grid search # param_grid = { # 'bootstrap': [True], -# 'max_depth': [10, 20, 40, 80, 160], -# 'max_features': [20, 40, 80, 160], +# 'max_depth': [5, 10, 15, 20], +# 'max_features': [5, 10, 15], # 'n_estimators': [128, 256, 512, 1024] # } # @@ -39,14 +28,13 @@ print("Testing accuracy = " + str(testing_accuracy)) # grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=3, n_jobs=10, verbose=1) # grid_search.fit(X_train, y_train) # -# print(grid_search.best_params_) # {'bootstrap': True, 'max_depth': 40, 'max_features': 20, 'n_estimators': 512} +# print(grid_search.best_params_) # {'bootstrap': True, 'max_depth': 20, 'max_features': 10, 'n_estimators': 128} # Using the best hyperparameter grid random_forest_tuned = RandomForestClassifier(bootstrap=True, - max_depth=60, - max_features=20, - n_estimators=512) - + max_depth=20, + max_features=10, + n_estimators=128) # Fit and train the random forest tuned model random_forest_tuned.fit(X_train, y_train) training_accuracy_tuned = random_forest_tuned.score(X_train, y_train) @@ -65,4 +53,4 @@ print(tuned_predictions) # Make a file TestingResultsBinary.csv that contains the 128 numbers and the labels in each line testing_data[128] = tuned_predictions -testing_data.to_csv("output/TestingResultsMulti.csv") +testing_data.to_csv("output/TestingResultsMulti.csv") \ No newline at end of file -- GitLab