test2.py

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the training data
train_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TrainingDataMulti.csv")

# Split the features and labels
X_train = train_data.iloc[:, :-1]
y_train = train_data.iloc[:, -1]

# Get the column names of the training data
feature_names = X_train.columns

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier()

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Load the testing data
test_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TestingDataMulti.csv")

# Set the column names of the testing data to match the training data
test_data.columns = feature_names

# Predict labels for the testing data
y_pred = rf_classifier.predict(test_data)

# Create a DataFrame with the computed labels for testing data
testing_results = pd.DataFrame(y_pred, columns=["Label"])

# Compute predictions on the training data
y_train_pred = rf_classifier.predict(X_train)

# Calculate training error and accuracy
training_error = 1 - accuracy_score(y_train, y_train_pred)
training_accuracy = accuracy_score(y_train, y_train_pred)

# Print the training error and accuracy
print("Training Error:", training_error)
print("Training Accuracy:", training_accuracy)


# Show the computed labels for all testing data
print("Computed Labels for Testing Data:")
print(testing_results)

# Show the computed labels for all testing data
for index, row in testing_results.iterrows():
    print(f"Computed Label for Trace {index+1}: {row['Label']}")

# Save the computed labels to a file
testing_results.to_csv("TestingResultsMulti.csv", index=False)