Skip to content
Snippets Groups Projects
Commit b17171a6 authored by yl1r22's avatar yl1r22
Browse files

Upload New File

parent 0085cfac
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:6064e0b1 tags:
``` python
#Import scikit-learn dataset library
#from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
from joblib import dump
#Read training datasets
df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
# Print the head of csv document to check
print(df.head(1))
# The first 128 columns are features
df_feature = df.iloc[:, :128]
# the 129th column is labels
df_label = df.iloc[:, 128]
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:cdc65331 tags:
``` python
# Create a Randomforest Classifier
clf1 = RandomForestClassifier(n_estimators=100, max_features=78)
# Train the model using the training sets
clf1.fit(X_train, y_train)
# #Predict the response for test dataset
y_pred1 = clf1.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
```
%% Output
Accuracy: 0.9416666666666667
%% Cell type:code id:ae7d5339 tags:
``` python
# Using Cross-validation to evaluate classifier
scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
#Print model's Scores
print("Scores", scores1)
print("Mean Scores", np.mean(scores1))
```
%% Output
Scores [0.94166667 0.95104167 0.934375 0.934375 0.921875 ]
Mean Scores 0.9366666666666668
%% Cell type:code id:253d7c20 tags:
``` python
#Dump the model
dump(clf1, 'H:/AI classification/RFC_part2.pkl')
```
%% Output
['H:/AI classification/RFC_part2.pkl']
%% Cell type:code id:943bcea4 tags:
``` python
# Load testing dataset
test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
# Predict Testing dataset
predictions = clf1.predict(test_data)
# Convert predictions into dataframe format
predictions_df = pd.DataFrame(predictions)
#Write the predictions to testing dataset
result = pd.concat([test_data,predictions_df], axis=1)
#Output a csv document
result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
```
%% Cell type:code id:8a8a1e04 tags:
``` python
# print precdictions
print(predictions)
```
%% Output
[2 2 2 2 2 2 1 1 2 2 2 1 1 1 1 1 2 1 1 1 1 1 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1
1 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
%% Cell type:code id:1521abed tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment