Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
COMP3217_Coursework2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
yw10n22
COMP3217_Coursework2
Commits
9ed58d3d
Commit
9ed58d3d
authored
2 years ago
by
yw10n22
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
0e496c8a
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
PartBLogisticRegression.py
+77
-0
77 additions, 0 deletions
PartBLogisticRegression.py
with
77 additions
and
0 deletions
PartBLogisticRegression.py
0 → 100644
+
77
−
0
View file @
9ed58d3d
# Import the necessary libraries
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
from
sklearn.preprocessing
import
StandardScaler
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.multiclass
import
OneVsRestClassifier
# For multi-category tasks
from
sklearn.metrics
import
accuracy_score
from
sklearn
import
datasets
,
neighbors
,
linear_model
from
sklearn.metrics
import
confusion_matrix
,
ConfusionMatrixDisplay
,
f1_score
from
sklearn.model_selection
import
train_test_split
# Define a function to import data
def
import_data
(
filename
):
x
=
[]
y
=
[]
with
open
(
filename
,
'
r
'
)
as
content
:
for
line
in
content
.
readlines
():
line
=
line
.
strip
().
split
(
'
,
'
)
try
:
x
.
append
([
1
]
+
[
float
(
val
)
for
val
in
line
[:
128
]])
y
.
append
(
float
(
line
[
-
1
]))
except
ValueError
:
# Handle conversion errors, such as skipping or logging errors
print
(
f
"
Illegal line:
{
line
}
"
)
xmat
=
np
.
array
(
x
)
ymat
=
np
.
array
(
y
)
return
xmat
,
ymat
# define a function to train the model
def
train_model
(
xmat
,
ymat
):
scaler
=
StandardScaler
()
xmat
=
scaler
.
fit_transform
(
xmat
)
# Normalised transformation of feature data
model
=
OneVsRestClassifier
(
LogisticRegression
(
max_iter
=
10001
))
model
.
fit
(
xmat
,
ymat
)
return
model
,
scaler
# define a function to predict labels and return the predictions
def
predict_labels
(
xmat
,
model
,
scaler
):
xmat
=
scaler
.
transform
(
xmat
)
predictions
=
model
.
predict
(
xmat
).
astype
(
int
)
return
predictions
# Import the training data
xmat
,
ymat
=
import_data
(
'
TrainingDataMulti.csv
'
)
# Split the data in TrainingDataBinary.csv into training and testing sets (90% training, 10% testing)
xmat_train
,
xmat_test
,
ymat_train
,
ymat_test
=
train_test_split
(
xmat
,
ymat
,
test_size
=
0.1
,
random_state
=
42
)
# Use the train_model function defined above to train the model
model
,
scaler
=
train_model
(
xmat_train
,
ymat_train
)
# Predict the labels in the training set that are divided into test sets
predicted_labels
=
predict_labels
(
xmat_test
,
model
,
scaler
)
# Calculate the training accuracy
accuracy
=
accuracy_score
(
ymat_test
,
predicted_labels
)
#get confusion matrix
cm
=
confusion_matrix
(
ymat_test
,
predicted_labels
,
labels
=
model
.
classes_
)
disp
=
ConfusionMatrixDisplay
(
confusion_matrix
=
cm
,
display_labels
=
model
.
classes_
)
disp
.
plot
()
plt
.
show
()
# Use import_data function defined above to import the test datasets
xmat_testing
,
ymat_testing
=
import_data
(
'
TestingDataMulti.csv
'
)
# Use predict_labels function defined above to predict their labels
predicted_testinglabels
=
predict_labels
(
xmat_testing
,
model
,
scaler
).
astype
(
int
)
# Print out accuracy and prediction labels
print
(
'
Training accuracy:
'
,
accuracy
)
print
(
'
Predicted labels:
'
,
predicted_testinglabels
)
# Save the predicted labels to a CSV file
test_results
=
pd
.
DataFrame
({
'
Label
'
:
predicted_testinglabels
})
test_results
.
to_csv
(
'
TestingResultsMulti.csv
'
,
index
=
False
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment