Select Git revision
index.doctree
test2.py 1.68 KiB
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Load the training data
train_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TrainingDataMulti.csv")
# Split the features and labels
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]
# Get the column names of the training data
feature_names = X_train.columns
# Create a Random Forest classifier
rf_classifier = RandomForestClassifier()
# Train the classifier
rf_classifier.fit(X_train, y_train)
# Load the testing data
test_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TestingDataMulti.csv")
# Set the column names of the testing data to match the training data
test_data.columns = feature_names
# Predict labels for the testing data
y_pred = rf_classifier.predict(test_data)
# Create a DataFrame with the computed labels for testing data
testing_results = pd.DataFrame(y_pred, columns=["Label"])
# Compute predictions on the training data
y_train_pred = rf_classifier.predict(X_train)
# Calculate training error and accuracy
training_error = 1 - accuracy_score(y_train, y_train_pred)
training_accuracy = accuracy_score(y_train, y_train_pred)
# Print the training error and accuracy
print("Training Error:", training_error)
print("Training Accuracy:", training_accuracy)
# Show the computed labels for all testing data
print("Computed Labels for Testing Data:")
print(testing_results)
# Show the computed labels for all testing data
for index, row in testing_results.iterrows():
print(f"Computed Label for Trace {index+1}: {row['Label']}")
# Save the computed labels to a file
testing_results.to_csv("TestingResultsMulti.csv", index=False)