Skip to content
Snippets Groups Projects
Commit ef089f14 authored by yl1r22's avatar yl1r22
Browse files

Delete AI_Part2.ipynb

parent 2c80dddb
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:6064e0b1 tags:
``` python
#Import scikit-learn dataset library
#from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
from joblib import dump
df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
print(df.head(1))
df_feature = df.iloc[:, :128]
df_label = df.iloc[:, 128]
#dftest = pd.read_csv('H:\AI classification\TestingDataBinary.csv')
#X_test = dftest.iloc[:, :128]
#y_test = dftest.iloc[:, 128]
#Load dataset
#cancer = datasets.load_breast_cancer()
# print the names of the features
#print("Features: ", cancer.feature_names)
# print the label type of cancer('malignant' 'benign')
#print("Labels: ", cancer.target_names)
# print data(feature)shape
#print (cancer.data.shape)
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:c4bcd563 tags:
``` python
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Cell type:code id:cdc65331 tags:
``` python
# Create a rfc
clf1 = RandomForestClassifier(n_estimators=200, max_features=78)
clf1.fit(X_train, y_train)
y_pred1 = clf1.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
```
%% Output
Accuracy: 0.9558333333333333
%% Cell type:code id:ae7d5339 tags:
``` python
scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
print("Scores", scores1)
print("Mean Scores", np.mean(scores1))
```
%% Output
Scores [0.934375 0.91979167 0.940625 0.94583333 0.93958333]
Mean Scores 0.9360416666666668
%% Cell type:code id:253d7c20 tags:
``` python
dump(clf1, 'H:/AI classification/RFC_part2.pkl')
```
%% Output
['H:/AI classification/RFC_part2.pkl']
%% Cell type:code id:943bcea4 tags:
``` python
# Load testing dataset
test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
# predict dataset
predictions = clf1.predict(test_data)
predictions_df = pd.DataFrame(predictions)
# write the result to dataset
result = pd.concat([test_data,predictions_df], axis=1)
#create a csv dcument
result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
```
%% Cell type:code id:8a8a1e04 tags:
``` python
print(predictions)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:1521abed tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment