diff --git a/PartBLogisticRegression.py b/PartBLogisticRegression.py new file mode 100644 index 0000000000000000000000000000000000000000..ebc6d6cccdca0dc2988f20404ab691c89cbb35c2 --- /dev/null +++ b/PartBLogisticRegression.py @@ -0,0 +1,77 @@ +# Import the necessary libraries +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import LogisticRegression +from sklearn.multiclass import OneVsRestClassifier # For multi-category tasks +from sklearn.metrics import accuracy_score +from sklearn import datasets, neighbors, linear_model +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score +from sklearn.model_selection import train_test_split + +# Define a function to import data +def import_data(filename): + x = [] + y = [] + with open(filename, 'r') as content: + for line in content.readlines(): + line = line.strip().split(',') + try: + x.append([1] + [float(val) for val in line[:128]]) + y.append(float(line[-1])) + except ValueError: + # Handle conversion errors, such as skipping or logging errors + print(f"Illegal line: {line}") + xmat = np.array(x) + ymat = np.array(y) + return xmat, ymat + +# define a function to train the model +def train_model(xmat, ymat): + scaler = StandardScaler() + xmat = scaler.fit_transform(xmat) # Normalised transformation of feature data + model = OneVsRestClassifier(LogisticRegression(max_iter=10001)) + model.fit(xmat, ymat) + return model, scaler + +# define a function to predict labels and return the predictions +def predict_labels(xmat, model, scaler): + xmat = scaler.transform(xmat) + predictions = model.predict(xmat).astype(int) + return predictions + +# Import the training data +xmat, ymat = import_data('TrainingDataMulti.csv') + +# Split the data in TrainingDataBinary.csv into training and testing sets (90% training, 10% testing) +xmat_train, xmat_test, ymat_train, ymat_test = train_test_split(xmat, ymat, test_size=0.1, random_state=42) + + +# Use the train_model function defined above to train the model +model, scaler = train_model(xmat_train, ymat_train) + +# Predict the labels in the training set that are divided into test sets +predicted_labels = predict_labels(xmat_test, model, scaler) + +# Calculate the training accuracy +accuracy = accuracy_score(ymat_test, predicted_labels) + +#get confusion matrix +cm = confusion_matrix(ymat_test, predicted_labels, labels=model.classes_) +disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_) +disp.plot() +plt.show() + +# Use import_data function defined above to import the test datasets +xmat_testing, ymat_testing = import_data('TestingDataMulti.csv') +# Use predict_labels function defined above to predict their labels +predicted_testinglabels = predict_labels(xmat_testing, model, scaler).astype(int) + +# Print out accuracy and prediction labels +print('Training accuracy:', accuracy) +print('Predicted labels:', predicted_testinglabels) + +# Save the predicted labels to a CSV file +test_results = pd.DataFrame({'Label': predicted_testinglabels}) +test_results.to_csv('TestingResultsMulti.csv', index=False)