Skip to content
Snippets Groups Projects
Commit 6e57b0e9 authored by yl3r22's avatar yl3r22
Browse files

Update Untitled6.ipynb

parent f53a09ad
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:314edca9 tags:
``` python
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm, metrics
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
#Read data from a CSV file and parse it into a Pandas DataFrame
#header is an optional parameter specifying whether the CSV file contains a header line. In this task, all data needs to be loaded as content and no header line is needed
df = pd.read_csv('C:\\Users\yl3r22\Downloads\TrainingDataBinary.csv', header= None)
df1 = pd.read_csv('C:\\Users\yl3r22\Downloads\TestingDataBinary.csv', header= None)
print(df.head(1))
#Store the first 128 columns in data frame df as feature data in the 'df_feature ' variable and store the 128th column as label data in the 'df_label ' variable
df_feature = df.iloc[:, :128]
df_label = df.iloc[:, 128]
cancer = datasets.load_breast_cancer()
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2)
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:e614c178 tags:
``` python
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
TrainingAccuracy: 0.8775
%% Cell type:code id:4d4d4c52 tags:
``` python
#Use a classifier named 'clf' to make predictions on data named 'df1' and store the results in a variable named y_result
y_result = clf.predict(df1)
print(y_result)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0
0 0 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:f8c31855 tags:
``` python
#Create a svm Classifier
clf1 = RandomForestClassifier(n_estimators=100, max_features=70)# Linear Kernel
#Train the model using the training sets
clf1.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf1.predict(X_test)
print("TrainingAccuracy:",metrics.accuracy_score(y_test, y_pred))
```
%% Output
TrainingAccuracy: 0.985
%% Cell type:markdown id:31fc5eaa tags:
%% Cell type:code id:95729aae tags:
``` python
#Use cross-validation to evaluate the performance of the classifier 'clf1 ' on the training dataset 'X_train' and the corresponding target variable 'y_train'
scores0 = cross_val_score(clf1, X_train, y_train, cv = 6)
print("Scores:", scores0)
print("MeanScores:", np.mean(scores0))
```
%% Output
Scores: [0.97625 0.97625 0.98375 0.98125 0.97625 0.97875]
MeanScores: 0.9787499999999999
%% Cell type:code id:65a2e2ed tags:
``` python
test_data=pd.read_csv("C:\\Users\yl3r22\Downloads\TestingDataBinary.csv",header=None)
# Predict testing datasets at first
# Use previous model - df1
predictions = clf1.predict(test_data)
# Convert data frame
prediction_df = pd.DataFrame(predictions)
result = pd.concat([test_data,prediction_df],axis=1)
# output the csv documentary
result.to_csv('C:\\Users\yl3r22\Downloads\TestingResultsBinary.csv',index = False, header = False)
```
%% Cell type:code id:1251c20e tags:
``` python
# Check Predictions
print(predictions)
```
%% Output
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
%% Cell type:code id:f2018309 tags:
``` python
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment