Skip to content
Snippets Groups Projects
Commit 00f66c0c authored by yl1r22's avatar yl1r22
Browse files

Upload New File

parent 3be13398
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:6064e0b1 tags:
``` python
#Import scikit-learn dataset library
#from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
from joblib import dump
df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
print(df.head(1))
df_feature = df.iloc[:, :128]
df_label = df.iloc[:, 128]
#dftest = pd.read_csv('H:\AI classification\TestingDataBinary.csv')
#X_test = dftest.iloc[:, :128]
#y_test = dftest.iloc[:, 128]
#Load dataset
#cancer = datasets.load_breast_cancer()
# print the names of the features
#print("Features: ", cancer.feature_names)
# print the label type of cancer('malignant' 'benign')
#print("Labels: ", cancer.target_names)
# print data(feature)shape
#print (cancer.data.shape)
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:c4bcd563 tags:
``` python
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Cell type:code id:cdc65331 tags:
``` python
# Create a rfc
clf1 = RandomForestClassifier(n_estimators=200, max_features=78)
clf1.fit(X_train, y_train)
y_pred1 = clf1.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
```
%% Output
Accuracy: 0.9558333333333333
%% Cell type:code id:ae7d5339 tags:
``` python
scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
print("Scores", scores1)
print("Mean Scores", np.mean(scores1))
```
%% Output
Scores [0.934375 0.91979167 0.940625 0.94583333 0.93958333]
Mean Scores 0.9360416666666668
%% Cell type:code id:253d7c20 tags:
``` python
dump(clf1, 'H:/AI classification/RFC_part2.pkl')
```
%% Output
['H:/AI classification/RFC_part2.pkl']
%% Cell type:code id:943bcea4 tags:
``` python
# Load testing dataset
test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
# predict dataset
predictions = clf1.predict(test_data)
predictions_df = pd.DataFrame(predictions)
# write the result to dataset
result = pd.concat([test_data,predictions_df], axis=1)
#create a csv dcument
result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
```
%% Cell type:code id:8a8a1e04 tags:
``` python
print(predictions)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:1521abed tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment