Skip to content
Snippets Groups Projects
Commit 0e496c8a authored by yw10n22's avatar yw10n22
Browse files

Upload New File

parent 1a818577
Branches
No related tags found
No related merge requests found
# Import the necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import datasets, neighbors, linear_model
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score
from sklearn.model_selection import train_test_split
# Define a function to import data
def import_data(filename):
x = []
y = []
with open(filename, 'r') as content:
for line in content.readlines():
line = line.strip().split(',')
try:
x.append([1] + [float(val) for val in line[:128]])
y.append(float(line[-1]))
except ValueError:
# Handle conversion errors, such as skipping or logging errors
print(f"Illegal line: {line}")
xmat = np.array(x)
ymat = np.array(y)
return xmat, ymat
# define a function to train the model
def train_model(xmat, ymat):
scaler = StandardScaler()
xmat = scaler.fit_transform(xmat) # Normalised transformation of feature data
# Create a logistic regression model object and specify a maximum number of iterations of 10001
model = LogisticRegression(max_iter=10001)
model.fit(xmat, ymat)
return model, scaler
# define a function to predict labels and return the predictions
def predict_labels(xmat, model, scaler):
xmat = scaler.transform(xmat)
predictions = model.predict(xmat)
return predictions
# Import the training data
xmat, ymat = import_data('TrainingDataBinary.csv')
# Split the data in TrainingDataBinary.csv into training and testing sets (90% training, 10% testing)
xmat_train, xmat_test, ymat_train, ymat_test = train_test_split(xmat, ymat, test_size=0.1, random_state=42)
# Use the train_model function defined above to train the model
model, scaler = train_model(xmat_train, ymat_train)
# Predict the labels in the training set that are divided into test sets
predicted_labels = predict_labels(xmat_test, model, scaler)
# Calculate the training accuracy
accuracy = accuracy_score(ymat_test, predicted_labels)
#get confusion matrix
cm = confusion_matrix(ymat_test, predicted_labels, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
disp.plot()
plt.show()
# Use import_data function defined above to import the testing datasets
xmat_testing, ymat_testing = import_data('TestingDataBinary.csv')
# Use predict_labels function defined above to predict their labels
predicted_testinglabels = predict_labels(xmat_testing, model, scaler).astype(int)
# Print out accuracy and prediction labels
print('Training accuracy:', accuracy)
print('Predicted labels:', predicted_testinglabels)
# Save the predicted labels to a CSV file
test_results = pd.DataFrame({'Label': predicted_testinglabels})
test_results.to_csv('TestingResultsBinary.csv', index=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment