Skip to content
Snippets Groups Projects
Select Git revision
  • 2c362131edd57f7056448b69f05d08ca0955eb02
  • master default protected
  • development
  • gh-pages
4 results

index.doctree

Blame
  • test2.py 1.68 KiB
    import pandas as pd
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score
    
    # Load the training data
    train_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TrainingDataMulti.csv")
    
    # Split the features and labels
    X_train = train_data.iloc[:, :-1]
    y_train = train_data.iloc[:, -1]
    
    # Get the column names of the training data
    feature_names = X_train.columns
    
    # Create a Random Forest classifier
    rf_classifier = RandomForestClassifier()
    
    # Train the classifier
    rf_classifier.fit(X_train, y_train)
    
    # Load the testing data
    test_data = pd.read_csv("/Users/rebecca_dxy/Downloads/Machine/TestingDataMulti.csv")
    
    # Set the column names of the testing data to match the training data
    test_data.columns = feature_names
    
    # Predict labels for the testing data
    y_pred = rf_classifier.predict(test_data)
    
    # Create a DataFrame with the computed labels for testing data
    testing_results = pd.DataFrame(y_pred, columns=["Label"])
    
    # Compute predictions on the training data
    y_train_pred = rf_classifier.predict(X_train)
    
    # Calculate training error and accuracy
    training_error = 1 - accuracy_score(y_train, y_train_pred)
    training_accuracy = accuracy_score(y_train, y_train_pred)
    
    # Print the training error and accuracy
    print("Training Error:", training_error)
    print("Training Accuracy:", training_accuracy)
    
    
    # Show the computed labels for all testing data
    print("Computed Labels for Testing Data:")
    print(testing_results)
    
    # Show the computed labels for all testing data
    for index, row in testing_results.iterrows():
        print(f"Computed Label for Trace {index+1}: {row['Label']}")
    
    # Save the computed labels to a file
    testing_results.to_csv("TestingResultsMulti.csv", index=False)