Skip to content
Snippets Groups Projects
Commit 862c6abf authored by yl3r22's avatar yl3r22
Browse files

Update Untitled7.ipynb

parent 07e0ab3f
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:58e43186 tags:
``` python
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm, metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
#Read data from a CSV file and parse it into a Pandas DataFrame
df = pd.read_csv('C:\\Users\yl3r22\Downloads\TrainingDataMulti.csv', header= None)
df1 = pd.read_csv('C:\\Users\yl3r22\Downloads\TestingDataMulti.csv', header= None)
print(df.head(1))
#Store the first 128 columns in data frame df as feature data in the 'df_feature ' variable and store the 128th column as label data in the 'df_label ' variable
df_feature = df.iloc[:, :128]
df_label = df.iloc[:, 128]
cancer = datasets.load_breast_cancer()
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2)
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:461e2292 tags:
``` python
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
TrainingAccuracy: 0.7008333333333333
%% Cell type:code id:fcda6021 tags:
``` python
y_result = clf.predict(df1)
print(y_result)
#Create a RandomForestClassifier Classifier
clf1 = RandomForestClassifier(n_estimators=100, max_features=70)# Linear Kernel
#Train the model using the training sets
clf1.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf1.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
[2 0 2 0 0 0 1 1 2 2 1 1 1 1 1 1 0 2 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1
1 0 1 1 0 1 0 0 1 1 1 0 0 1 1 1 1 1 1 1 2 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
TrainingAccuracy: 0.9316666666666666
%% Cell type:code id:897bcf3d tags:
``` python
# Evaluate the classifier
scores0 = cross_val_score(clf1, X_train, y_train, cv = 6)
print("Scores:", scores0)
print("MeanScores:", np.mean(scores0))
```
%% Output
Scores: [0.9275 0.9375 0.9475 0.94375 0.94 0.9325 ]
MeanScores: 0.938125
%% Cell type:code id:1a48b862 tags:
``` python
test_data=pd.read_csv("C:\\Users\yl3r22\Downloads\TestingDataMulti.csv",header=None)
# Predict testing datasets at first
# Use previous model - df1
predictions = clf1.predict(test_data)
# Transform to data frame
prediction_df = pd.DataFrame(predictions)
result = pd.concat([test_data,prediction_df],axis=1)
# output the csv documentary
result.to_csv('C:\\Users\yl3r22\Downloads\TestingResultsMulti.csv',index = False, header = False)
```
%% Cell type:code id:354f7f52 tags:
``` python
# View Predictions
print(predictions)
```
%% Output
[2 2 2 2 2 2 1 1 2 2 2 1 1 1 2 2 2 2 2 2 1 1 2 2 2 2 1 0 0 0 0 0 0 1 1 1 1
1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:54cd25fd tags:
``` python
```
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment