things

ac72c4f0 · plaaosert · 315ec84f · 315ec84f · ac72c4f0
Commit ac72c4f0 authored Jun 7, 2023 by plaaosert
--- a/main.py
+++ b/main.py
--- a/task1.py
+++ b/task1.py
+#Import scikit-learn dataset library
+import pandas as pd
+from matplotlib import pyplot as plt
+from sklearn import datasets
+from sklearn.decomposition import PCA
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn import svm, metrics
+
+import numpy as np
+import csv
+
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import Bunch
+
+
+def load_dataset():
+    with open('data/TrainingDataBinary.csv') as csv_file:
+        data_file = csv.reader(csv_file)
+        temp = next(data_file)
+        n_samples = 6000
+        n_features = 128  # num. of features (not target)
+        feature_names = [
+            *["R{}-PA{}".format(x+1, y+1) for x in range(29) for y in range(4)],
+            *["Control, Snort, Relay #{}".format(x+1) for x in range(12)]
+        ]
+
+        target_names = ['Negative', 'Positive']
+        data = np.empty((n_samples, n_features))
+        target = np.empty((n_samples,), dtype=np.int64)
+
+        for i, sample in enumerate(data_file):
+            data[i] = np.asarray(sample[:-1], dtype=np.float64)
+            target[i] = np.asarray(sample[-1], dtype=np.int64)
+
+    return Bunch(data=data, target=target, feature_names = feature_names, target_names = target_names)
+
+
+def load_test_data():
+    with open('data/TestingDataBinary.csv') as csv_file:
+        data_file = csv.reader(csv_file)
+        temp = next(data_file)
+        n_samples = 100
+        n_features = 128  # num. of features (not target)
+        feature_names = [
+            *["R{}-PA{}".format(x+1, y+1) for x in range(29) for y in range(4)],
+            *["Control, Snort, Relay #{}".format(x+1) for x in range(12)]
+        ]
+
+        target_names = ['Negative', 'Positive']
+        data = np.empty((n_samples, n_features))
+
+        for i, sample in enumerate(data_file):
+            data[i] = np.asarray(sample, dtype=np.float64)
+
+    return Bunch(data=data, feature_names = feature_names, target_names = target_names)
+
+
+dataset = load_dataset()
+
+# print the names of the  features
+print("Features: ", dataset.feature_names)
+
+# print the label type of cancer('malignant' 'benign')
+print("Labels: ", dataset.target_names)
+
+# print data(feature)shape
+print (dataset.data.shape)
+
+
+# Split dataset into training set and test set
+X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.3) # 70% training and 30% test
+
+
+#Create a svm Classifier
+clf = svm.SVC(kernel='linear') # Linear Kernel
+
+#Train the model using the training sets
+clf.fit(X_train, y_train)
+
+#Predict the response for test dataset
+y_pred = clf.predict(X_test)
+
+print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
+
+print("{} elements tested, {} incorrect".format(
+    min(len(y_test), len(y_pred)), len([
+        True for t, p in zip(y_test, y_pred) if t != p
+    ])
+))
+
+# predict for test data
+test_dataset = load_test_data()
+test_results = clf.predict(test_dataset.data)
+
+print("Predicted {} values from test data: {}".format(
+    len(test_results), ", ".join(str(t) for t in test_results)
+))