Skip to content
Snippets Groups Projects
Select Git revision
8 results Searching

development.ini

Blame
  • PartA_Xinyi_Zhang.py 2.23 KiB
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import confusion_matrix, f1_score
    
    # Load the dataset
    data = pd.read_csv('TrainingDataBinary.csv')
    
    # Separate features (X) and labels (y)
    X = data.iloc[:, :-1]  # All columns except the last one
    y = data.iloc[:, -1]   # Last column (marker)
    
    # Split the data into training and validation sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create a Random Forest classifier
    rf_classifier = RandomForestClassifier(n_estimators=100, max_depth=50, min_samples_split=2)
    
    # Train the classifier
    rf_classifier.fit(X_train, y_train)
    
    # Make predictions on the validation set
    y_test_pred = rf_classifier.predict(X_test)
    
    # Evaluate the accuracy of the model on the validation set
    accuracy = accuracy_score(y_test, y_test_pred)
    print("Validation Accuracy:", accuracy)
    
    # Calculate the confusion matrix
    cm = confusion_matrix(y_test, y_test_pred)
    
    # Convert the confusion matrix to a DataFrame for better visualization
    cm_df = pd.DataFrame(cm, index=['True Label 0', 'True Label 1'], columns=['Predicted Label 0', 'Predicted Label 1'])
    
    # Create a heatmap of the confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.show()
    
    # Compute F1 score
    f1 = f1_score(y_test, y_test_pred)
    print("F1 Score:", f1)
    
    # Load the testing data
    test_data = pd.read_csv('TestingDataBinary.csv')
    
    # Make predictions on the testing data
    y_test_pred = rf_classifier.predict(test_data)
    
    # Add the predicted labels as a new column named "marker"
    test_data['marker'] = y_test_pred
    
    # Save the updated testing data to a new CSV file
    #test_data.to_csv('C:\\Users\\97061\\Documents\\Southampton\\Modules\\Semester2\\COMP3217 Security for CPS\\Assignment\\Assignment2\\Testoutputfile\\TestingResultsBinary.csv', index=False)
    test_data.to_csv('TestingResultsBinary.csv', index=False)
    
    # Print the predicted labels for the testing data
    print("Predicted labels for testing data:")
    print(y_test_pred)