Skip to content
Snippets Groups Projects
Commit 8716a0cf authored by ejc1g20's avatar ejc1g20
Browse files

Replace exercise1.py with updated version after further work.

parent 66c6cc45
No related branches found
No related tags found
No related merge requests found
...@@ -13,40 +13,28 @@ y = training_data["129"] ...@@ -13,40 +13,28 @@ y = training_data["129"]
# Get a list of the input variables # Get a list of the input variables
X = training_data.drop(["129"], axis=1) X = training_data.drop(["129"], axis=1)
# Split data into test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Create and fit a Random Forest Classifier model to the training data
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)
# Calculate training and testing accuracies
training_accuracy = random_forest.score(X_train, y_train)
testing_accuracy = random_forest.score(X_test, y_test)
print("Training accuracy = " + str(training_accuracy))
print("Testing accuracy = " + str(testing_accuracy))
# # define hyperparameter values for grid search # # define hyperparameter values for grid search
# param_grid = { # param_grid = {
# 'bootstrap': [True], # 'bootstrap': [True],
# 'max_depth': [10, 20, 40, 80, 160], # 'max_depth': [5, 10, 15, 20],
# 'max_features': [20, 40, 80, 160], # 'max_features': [5, 10, 15],
# 'n_estimators': [128, 256, 512, 1024] # 'n_estimators': [128, 256, 512, 1024]
# } # }
#
# # Create a model to use grid search # # Create a model to use grid search
# grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=3, n_jobs=10, verbose=1) # grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=3, n_jobs=10, verbose=1)
# grid_search.fit(X_train, y_train) # grid_search.fit(X_train, y_train)
# #
# print(grid_search.best_params_) # {'bootstrap': True, 'max_depth': 40, 'max_features': 20, 'n_estimators': 512} # print(grid_search.best_params_) # {'bootstrap': True, 'max_depth': 20, 'max_features': 15, 'n_estimators': 128}
# Using the best hyperparameter grid # Using the best hyperparameter grid
random_forest_tuned = RandomForestClassifier(bootstrap=True, random_forest_tuned = RandomForestClassifier(bootstrap=True,
max_depth=60, max_depth=20,
max_features=20, max_features=15,
n_estimators=512) n_estimators=128)
# Fit and train the random forest tuned model
random_forest_tuned.fit(X_train, y_train) random_forest_tuned.fit(X_train, y_train)
training_accuracy_tuned = random_forest_tuned.score(X_train, y_train) training_accuracy_tuned = random_forest_tuned.score(X_train, y_train)
testing_accuracy_tuned = random_forest_tuned.score(X_test, y_test) testing_accuracy_tuned = random_forest_tuned.score(X_test, y_test)
...@@ -58,11 +46,9 @@ print("Testing accuracy tuned = " + str(testing_accuracy_tuned)) ...@@ -58,11 +46,9 @@ print("Testing accuracy tuned = " + str(testing_accuracy_tuned))
# Read the testing data into a pandas dataframe # Read the testing data into a pandas dataframe
testing_data = pd.read_csv("data/TestingDataBinary.csv", header=None) testing_data = pd.read_csv("data/TestingDataBinary.csv", header=None)
# Make predictions of the classifications of the testing data
tuned_predictions = random_forest_tuned.predict(testing_data) tuned_predictions = random_forest_tuned.predict(testing_data)
print(tuned_predictions) print(tuned_predictions)
# Make a file TestingResultsBinary.csv that contains the 128 numbers and the labels in each line # Make a file TestingResultsBinary.csv that contains the 128 numbers and the labels in each line
testing_data[128] = tuned_predictions testing_data[128] = tuned_predictions
testing_data.to_csv("output/TestingResultsBinary.csv") testing_data.to_csv("output/TestingResultsBinary.csv")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment