Skip to content
Snippets Groups Projects
Commit 6e57b0e9 authored by yl3r22's avatar yl3r22
Browse files

Update Untitled6.ipynb

parent f53a09ad
Branches
Tags
No related merge requests found
%% Cell type:code id:314edca9 tags:
``` python
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm, metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
#Read data from a CSV file and parse it into a Pandas DataFrame
#header is an optional parameter specifying whether the CSV file contains a header line. In this task, all data needs to be loaded as content and no header line is needed
df = pd.read_csv('C:\\Users\yl3r22\Downloads\TrainingDataBinary.csv', header= None)
df1 = pd.read_csv('C:\\Users\yl3r22\Downloads\TestingDataBinary.csv', header= None)
print(df.head(1))
#Store the first 128 columns in data frame df as feature data in the 'df_feature ' variable and store the 128th column as label data in the 'df_label ' variable
df_feature = df.iloc[:, :128]
df_label = df.iloc[:, 128]
cancer = datasets.load_breast_cancer()
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2)
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:e614c178 tags:
``` python
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
TrainingAccuracy: 0.8775
%% Cell type:code id:4d4d4c52 tags:
``` python
#Use a classifier named 'clf' to make predictions on data named 'df1' and store the results in a variable named y_result
y_result = clf.predict(df1)
print(y_result)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0
0 0 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:f8c31855 tags:
``` python
#Create a svm Classifier
clf1 = RandomForestClassifier(n_estimators=100, max_features=70)# Linear Kernel
#Train the model using the training sets
clf1.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf1.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
TrainingAccuracy: 0.985
%% Cell type:markdown id:31fc5eaa tags:
%% Cell type:code id:95729aae tags:
``` python
#Use cross-validation to evaluate the performance of the classifier 'clf1 ' on the training dataset 'X_train' and the corresponding target variable 'y_train'
scores0 = cross_val_score(clf1, X_train, y_train, cv = 6)
print("Scores:", scores0)
print("MeanScores:", np.mean(scores0))
```
%% Output
Scores: [0.97625 0.97625 0.98375 0.98125 0.97625 0.97875]
MeanScores: 0.9787499999999999
%% Cell type:code id:65a2e2ed tags:
``` python
test_data=pd.read_csv("C:\\Users\yl3r22\Downloads\TestingDataBinary.csv",header=None)
# Predict testing datasets at first
# Use previous model - df1
predictions = clf1.predict(test_data)
# Convert data frame
prediction_df = pd.DataFrame(predictions)
result = pd.concat([test_data,prediction_df],axis=1)
# output the csv documentary
result.to_csv('C:\\Users\yl3r22\Downloads\TestingResultsBinary.csv',index = False, header = False)
```
%% Cell type:code id:1251c20e tags:
``` python
# Check Predictions
print(predictions)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:f2018309 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment