Upload New File

0e496c8a · yw10n22 · 1a818577 · 0e496c8a
Commit 0e496c8a authored Jun 8, 2023 by yw10n22
--- a/PartALogisticRegression.py
+++ b/PartALogisticRegression.py
+# Import the necessary libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn import datasets, neighbors, linear_model
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
+from sklearn.model_selection import train_test_split
+
+# Define a function to import data
+def import_data(filename):
+    x = []
+    y = []
+    with open(filename, 'r') as content:
+        for line in content.readlines():
+            line = line.strip().split(',')
+            try:
+                x.append([1] + [float(val) for val in line[:128]])
+                y.append(float(line[-1]))
+            except ValueError:
+                # Handle conversion errors, such as skipping or logging errors
+                print(f"Illegal line: {line}")
+    xmat = np.array(x)
+    ymat = np.array(y)
+    return xmat, ymat
+
+# define a function to train the model
+def train_model(xmat, ymat):
+    scaler = StandardScaler()
+    xmat = scaler.fit_transform(xmat)  # Normalised transformation of feature data
+    # Create a logistic regression model object and specify a maximum number of iterations of 10001
+    model = LogisticRegression(max_iter=10001)
+    model.fit(xmat, ymat)
+    return model, scaler
+
+# define a function to predict labels and return the predictions
+def predict_labels(xmat, model, scaler):
+    xmat = scaler.transform(xmat)
+    predictions = model.predict(xmat)
+    return predictions
+
+# Import the training data
+xmat, ymat = import_data('TrainingDataBinary.csv')
+
+# Split the data in TrainingDataBinary.csv into training and testing sets (90% training, 10% testing)
+xmat_train, xmat_test, ymat_train, ymat_test = train_test_split(xmat, ymat, test_size=0.1, random_state=42)
+
+
+# Use the train_model function defined above to train the model
+model, scaler = train_model(xmat_train, ymat_train)
+
+# Predict the labels in the training set that are divided into test sets
+predicted_labels = predict_labels(xmat_test, model, scaler)
+
+# Calculate the training accuracy
+accuracy = accuracy_score(ymat_test, predicted_labels)
+
+#get confusion matrix
+cm = confusion_matrix(ymat_test, predicted_labels, labels=model.classes_)
+disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
+disp.plot()
+plt.show()
+
+# Use import_data function defined above to import the testing datasets
+xmat_testing, ymat_testing = import_data('TestingDataBinary.csv')
+# Use predict_labels function defined above to predict their labels
+predicted_testinglabels = predict_labels(xmat_testing, model, scaler).astype(int)
+
+# Print out accuracy and prediction labels
+print('Training accuracy:', accuracy)
+print('Predicted labels:', predicted_testinglabels)
+
+# Save the predicted labels to a CSV file
+test_results = pd.DataFrame({'Label': predicted_testinglabels})
+test_results.to_csv('TestingResultsBinary.csv', index=False)
+