diff --git a/test2.py b/test2.py new file mode 100644 index 0000000000000000000000000000000000000000..7abe4b90546c807a83140b91cd79de0d68a0b2c7 --- /dev/null +++ b/test2.py @@ -0,0 +1,60 @@ +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score + +# Load the training data +train_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TrainingDataMulti.csv") + +# Split the features and labels +X_train = train_data.iloc[:, :-1] +y_train = train_data.iloc[:, -1] + +# Get the column names of the training data +feature_names = X_train.columns + +# Create a Random Forest classifier +rf_classifier = RandomForestClassifier() + +# Train the classifier +rf_classifier.fit(X_train, y_train) + +# Load the testing data +test_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TestingDataMulti.csv") + +# Set the column names of the testing data to match the training data +test_data.columns = feature_names + +# Predict labels for the testing data +y_pred = rf_classifier.predict(test_data) + +# Create a DataFrame with the computed labels for testing data +testing_results = pd.DataFrame(y_pred, columns=["Label"]) + +# Compute predictions on the training data +y_train_pred = rf_classifier.predict(X_train) + +# Calculate training error and accuracy +training_error = 1 - accuracy_score(y_train, y_train_pred) +training_accuracy = accuracy_score(y_train, y_train_pred) + +# Print the training error and accuracy +print("Training Error:", training_error) +print("Training Accuracy:", training_accuracy) + + +# Show the computed labels for all testing data +print("Computed Labels for Testing Data:") +print(testing_results) + +# Show the computed labels for all testing data +for index, row in testing_results.iterrows(): + print(f"Computed Label for Trace {index+1}: {row['Label']}") + +# Save the computed labels to a file +testing_results.to_csv("TestingResultsMulti.csv", index=False) + + + + + +