Skip to content
Snippets Groups Projects
Commit 7f8d6c84 authored by yl1r22's avatar yl1r22
Browse files

Upload New File

parent 0bd01afc
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:6064e0b1 tags:
``` python
#Import scikit-learn dataset library
#from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
from joblib import dump
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
#Read training datasets
df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
# Print the head of csv document to check
print(df.head(1))
# The first 128 columns are features
df_feature = df.iloc[:, :128]
# the 129th column is labels
df_label = df.iloc[:, 128]
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
```
%% Output
0 1 2 3 4 5 \
0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374
6 7 8 9 ... 119 120 121 122 123 \
0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0
124 125 126 127 128
0 0 0 0 0 0
[1 rows x 129 columns]
%% Cell type:code id:cdc65331 tags:
``` python
# Create a Randomforest Classifier
clf1 = RandomForestClassifier(n_estimators=200, max_features=78)#
# Train the model using the training sets
clf1.fit(X_train, y_train)
# #Predict the response for test dataset
y_pred1 = clf1.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
```
%% Output
Accuracy: 0.94
%% Cell type:code id:ae7d5339 tags:
``` python
# Using Cross-validation to evaluate classifier
scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
#Print model's Scores
print("Scores", scores1)
print("Mean Scores", np.mean(scores1))
```
%% Output
Scores [0.95 0.93541667 0.93541667 0.93020833 0.93541667]
Mean Scores 0.9372916666666666
%% Cell type:code id:253d7c20 tags:
``` python
#Dump the model
dump(clf1, 'H:/AI classification/RFC_part2.pkl')
```
%% Output
['H:/AI classification/RFC_part2.pkl']
%% Cell type:code id:943bcea4 tags:
``` python
# Load testing dataset
test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
# Predict Testing dataset
predictions = clf1.predict(test_data)
# Convert predictions into dataframe format
predictions_df = pd.DataFrame(predictions)
#Write the predictions to testing dataset
result = pd.concat([test_data,predictions_df], axis=1)
#Output a csv document
result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
```
%% Cell type:code id:8a8a1e04 tags:
``` python
# print precdictions
print(predictions)
```
%% Output
[2 2 2 2 2 2 1 1 2 2 2 1 1 1 2 2 2 1 1 1 1 1 2 2 2 2 0 2 2 0 0 0 0 1 1 1 1
1 1 1 1 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1 1 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
%% Cell type:code id:1521abed tags:
``` python
# Create and print confusion matrix
conf_mat = confusion_matrix(y_test, y_pred1)
print("Confusion Matrix:")
print(conf_mat)
```
%% Output
Confusion Matrix:
[[580 2 6]
[ 12 274 20]
[ 6 26 274]]
%% Cell type:code id:52bac3fa tags:
``` python
# Calculating f1 score
f1 = f1_score(y_test, y_pred1, average='weighted')
print("F1 Score:", f1)
```
%% Output
F1 Score: 0.9396875958397739
%% Cell type:code id:d63a6856 tags:
``` python
#Plotting confusion matixs
plt.figure(figsize=(10, 7))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
```
%% Output
%% Cell type:code id:131cc0be tags:
``` python
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment