Skip to content
Snippets Groups Projects
Commit ac72c4f0 authored by plaaosert's avatar plaaosert
Browse files

things

parent 315ec84f
No related branches found
No related tags found
No related merge requests found
task1.py 0 → 100644
#Import scikit-learn dataset library
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm, metrics
import numpy as np
import csv
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils import Bunch
def load_dataset():
with open('data/TrainingDataBinary.csv') as csv_file:
data_file = csv.reader(csv_file)
temp = next(data_file)
n_samples = 6000
n_features = 128 # num. of features (not target)
feature_names = [
*["R{}-PA{}".format(x+1, y+1) for x in range(29) for y in range(4)],
*["Control, Snort, Relay #{}".format(x+1) for x in range(12)]
]
target_names = ['Negative', 'Positive']
data = np.empty((n_samples, n_features))
target = np.empty((n_samples,), dtype=np.int64)
for i, sample in enumerate(data_file):
data[i] = np.asarray(sample[:-1], dtype=np.float64)
target[i] = np.asarray(sample[-1], dtype=np.int64)
return Bunch(data=data, target=target, feature_names = feature_names, target_names = target_names)
def load_test_data():
with open('data/TestingDataBinary.csv') as csv_file:
data_file = csv.reader(csv_file)
temp = next(data_file)
n_samples = 100
n_features = 128 # num. of features (not target)
feature_names = [
*["R{}-PA{}".format(x+1, y+1) for x in range(29) for y in range(4)],
*["Control, Snort, Relay #{}".format(x+1) for x in range(12)]
]
target_names = ['Negative', 'Positive']
data = np.empty((n_samples, n_features))
for i, sample in enumerate(data_file):
data[i] = np.asarray(sample, dtype=np.float64)
return Bunch(data=data, feature_names = feature_names, target_names = target_names)
dataset = load_dataset()
# print the names of the features
print("Features: ", dataset.feature_names)
# print the label type of cancer('malignant' 'benign')
print("Labels: ", dataset.target_names)
# print data(feature)shape
print (dataset.data.shape)
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.3) # 70% training and 30% test
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("{} elements tested, {} incorrect".format(
min(len(y_test), len(y_pred)), len([
True for t, p in zip(y_test, y_pred) if t != p
])
))
# predict for test data
test_dataset = load_test_data()
test_results = clf.predict(test_dataset.data)
print("Predicted {} values from test data: {}".format(
len(test_results), ", ".join(str(t) for t in test_results)
))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment