Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
COMP3217 CW2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ct2g20
COMP3217 CW2
Commits
ac72c4f0
Commit
ac72c4f0
authored
Jun 7, 2023
by
plaaosert
Browse files
Options
Downloads
Patches
Plain Diff
things
parent
315ec84f
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
main.py
+0
-0
0 additions, 0 deletions
main.py
task1.py
+99
-0
99 additions, 0 deletions
task1.py
with
99 additions
and
0 deletions
main.py
deleted
100644 → 0
+
0
−
0
View file @
315ec84f
This diff is collapsed.
Click to expand it.
task1.py
0 → 100644
+
99
−
0
View file @
ac72c4f0
#Import scikit-learn dataset library
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
sklearn
import
datasets
from
sklearn.decomposition
import
PCA
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.model_selection
import
train_test_split
,
GridSearchCV
from
sklearn
import
svm
,
metrics
import
numpy
as
np
import
csv
from
sklearn.pipeline
import
Pipeline
from
sklearn.preprocessing
import
StandardScaler
from
sklearn.utils
import
Bunch
def
load_dataset
():
with
open
(
'
data/TrainingDataBinary.csv
'
)
as
csv_file
:
data_file
=
csv
.
reader
(
csv_file
)
temp
=
next
(
data_file
)
n_samples
=
6000
n_features
=
128
# num. of features (not target)
feature_names
=
[
*
[
"
R{}-PA{}
"
.
format
(
x
+
1
,
y
+
1
)
for
x
in
range
(
29
)
for
y
in
range
(
4
)],
*
[
"
Control, Snort, Relay #{}
"
.
format
(
x
+
1
)
for
x
in
range
(
12
)]
]
target_names
=
[
'
Negative
'
,
'
Positive
'
]
data
=
np
.
empty
((
n_samples
,
n_features
))
target
=
np
.
empty
((
n_samples
,),
dtype
=
np
.
int64
)
for
i
,
sample
in
enumerate
(
data_file
):
data
[
i
]
=
np
.
asarray
(
sample
[:
-
1
],
dtype
=
np
.
float64
)
target
[
i
]
=
np
.
asarray
(
sample
[
-
1
],
dtype
=
np
.
int64
)
return
Bunch
(
data
=
data
,
target
=
target
,
feature_names
=
feature_names
,
target_names
=
target_names
)
def
load_test_data
():
with
open
(
'
data/TestingDataBinary.csv
'
)
as
csv_file
:
data_file
=
csv
.
reader
(
csv_file
)
temp
=
next
(
data_file
)
n_samples
=
100
n_features
=
128
# num. of features (not target)
feature_names
=
[
*
[
"
R{}-PA{}
"
.
format
(
x
+
1
,
y
+
1
)
for
x
in
range
(
29
)
for
y
in
range
(
4
)],
*
[
"
Control, Snort, Relay #{}
"
.
format
(
x
+
1
)
for
x
in
range
(
12
)]
]
target_names
=
[
'
Negative
'
,
'
Positive
'
]
data
=
np
.
empty
((
n_samples
,
n_features
))
for
i
,
sample
in
enumerate
(
data_file
):
data
[
i
]
=
np
.
asarray
(
sample
,
dtype
=
np
.
float64
)
return
Bunch
(
data
=
data
,
feature_names
=
feature_names
,
target_names
=
target_names
)
dataset
=
load_dataset
()
# print the names of the features
print
(
"
Features:
"
,
dataset
.
feature_names
)
# print the label type of cancer('malignant' 'benign')
print
(
"
Labels:
"
,
dataset
.
target_names
)
# print data(feature)shape
print
(
dataset
.
data
.
shape
)
# Split dataset into training set and test set
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
dataset
.
data
,
dataset
.
target
,
test_size
=
0.3
)
# 70% training and 30% test
#Create a svm Classifier
clf
=
svm
.
SVC
(
kernel
=
'
linear
'
)
# Linear Kernel
#Train the model using the training sets
clf
.
fit
(
X_train
,
y_train
)
#Predict the response for test dataset
y_pred
=
clf
.
predict
(
X_test
)
print
(
"
Accuracy:
"
,
metrics
.
accuracy_score
(
y_test
,
y_pred
))
print
(
"
{} elements tested, {} incorrect
"
.
format
(
min
(
len
(
y_test
),
len
(
y_pred
)),
len
([
True
for
t
,
p
in
zip
(
y_test
,
y_pred
)
if
t
!=
p
])
))
# predict for test data
test_dataset
=
load_test_data
()
test_results
=
clf
.
predict
(
test_dataset
.
data
)
print
(
"
Predicted {} values from test data: {}
"
.
format
(
len
(
test_results
),
"
,
"
.
join
(
str
(
t
)
for
t
in
test_results
)
))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment