Upload New File

00f66c0c · yl1r22 · 3be13398 · 00f66c0c
Commit 00f66c0c authored 2 years ago by yl1r22
--- a/AI_Part2.ipynb
+++ b/AI_Part2.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6064e0b1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "         0            1          2            3           4            5    \\\n",
+      "0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374   \n",
+      "\n",
+      "         6          7          8          9    ...  119  120  121  122  123  \\\n",
+      "0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0   \n",
+      "\n",
+      "   124  125  126  127  128  \n",
+      "0    0    0    0    0    0  \n",
+      "\n",
+      "[1 rows x 129 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Import scikit-learn dataset library\n",
+    "#from sklearn import datasets\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "from sklearn import svm, metrics\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from joblib import dump\n",
+    "\n",
+    "\n",
+    "df = pd.read_csv('H:\\AI classification\\TrainingDataMulti.csv', header=None)\n",
+    "\n",
+    "print(df.head(1))\n",
+    "\n",
+    "df_feature = df.iloc[:, :128]\n",
+    "\n",
+    "df_label = df.iloc[:, 128]\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "#dftest = pd.read_csv('H:\\AI classification\\TestingDataBinary.csv')\n",
+    "\n",
+    "#X_test = dftest.iloc[:, :128]\n",
+    "\n",
+    "#y_test = dftest.iloc[:, 128]\n",
+    "\n",
+    "\n",
+    "#Load dataset\n",
+    "#cancer = datasets.load_breast_cancer()\n",
+    "\n",
+    "# print the names of the  features\n",
+    "#print(\"Features: \", cancer.feature_names)\n",
+    "\n",
+    "# print the label type of cancer('malignant' 'benign')\n",
+    "#print(\"Labels: \", cancer.target_names)\n",
+    "\n",
+    "# print data(feature)shape\n",
+    "#print (cancer.data.shape)\n",
+    "\n",
+    "\n",
+    "# Split dataset into training set and test set\n",
+    "X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4bcd563",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Create a svm Classifier\n",
+    "clf = svm.SVC(kernel='linear') # Linear Kernel\n",
+    "\n",
+    "#Train the model using the training sets\n",
+    "clf.fit(X_train, y_train)\n",
+    "\n",
+    "#Predict the response for test dataset\n",
+    "y_pred = clf.predict(X_test)\n",
+    "\n",
+    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cdc65331",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.9558333333333333\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a rfc \n",
+    "clf1 = RandomForestClassifier(n_estimators=200, max_features=78)\n",
+    "\n",
+    "clf1.fit(X_train, y_train)\n",
+    "\n",
+    "y_pred1 = clf1.predict(X_test)\n",
+    "\n",
+    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred1))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "ae7d5339",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scores [0.934375   0.91979167 0.940625   0.94583333 0.93958333]\n",
+      "Mean Scores 0.9360416666666668\n"
+     ]
+    }
+   ],
+   "source": [
+    "scores1 = cross_val_score(clf1, X_train, y_train, cv=5)\n",
+    "\n",
+    "print(\"Scores\", scores1)\n",
+    "print(\"Mean Scores\", np.mean(scores1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "253d7c20",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['H:/AI classification/RFC_part2.pkl']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dump(clf1, 'H:/AI classification/RFC_part2.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "943bcea4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load testing dataset\n",
+    "test_data=pd.read_csv('H:\\AI classification\\TestingDataMulti.csv', header=None)\n",
+    "# predict dataset\n",
+    "predictions = clf1.predict(test_data)\n",
+    "predictions_df = pd.DataFrame(predictions)\n",
+    "# write the result to dataset\n",
+    "result = pd.concat([test_data,predictions_df], axis=1)\n",
+    "#create a csv dcument\n",
+    "result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "8a8a1e04",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
+      " 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1\n",
+      " 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1521abed",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:code id:6064e0b1 tags:
+
+``` python
+#Import scikit-learn dataset library
+#from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.model_selection import cross_val_score
+from sklearn import svm, metrics
+from sklearn.ensemble import RandomForestClassifier
+import pandas as pd
+import numpy as np
+from joblib import dump
+
+
+df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
+
+print(df.head(1))
+
+df_feature = df.iloc[:, :128]
+
+df_label = df.iloc[:, 128]
+
+
+
+
+#dftest = pd.read_csv('H:\AI classification\TestingDataBinary.csv')
+
+#X_test = dftest.iloc[:, :128]
+
+#y_test = dftest.iloc[:, 128]
+
+
+#Load dataset
+#cancer = datasets.load_breast_cancer()
+
+# print the names of the  features
+#print("Features: ", cancer.feature_names)
+
+# print the label type of cancer('malignant' 'benign')
+#print("Labels: ", cancer.target_names)
+
+# print data(feature)shape
+#print (cancer.data.shape)
+
+
+# Split dataset into training set and test set
+X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
+
+
+```
+
+%% Output
+
+             0            1          2            3           4            5    \
+    0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374
+    
+             6          7          8          9    ...  119  120  121  122  123  \
+    0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0
+    
+       124  125  126  127  128
+    0    0    0    0    0    0
+    
+    [1 rows x 129 columns]
+
+%% Cell type:code id:c4bcd563 tags:
+
+``` python
+#Create a svm Classifier
+clf = svm.SVC(kernel='linear') # Linear Kernel
+
+#Train the model using the training sets
+clf.fit(X_train, y_train)
+
+#Predict the response for test dataset
+y_pred = clf.predict(X_test)
+
+print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
+```
+
+%% Cell type:code id:cdc65331 tags:
+
+``` python
+# Create a rfc
+clf1 = RandomForestClassifier(n_estimators=200, max_features=78)
+
+clf1.fit(X_train, y_train)
+
+y_pred1 = clf1.predict(X_test)
+
+print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
+
+```
+
+%% Output
+
+    Accuracy: 0.9558333333333333
+
+%% Cell type:code id:ae7d5339 tags:
+
+``` python
+scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
+
+print("Scores", scores1)
+print("Mean Scores", np.mean(scores1))
+```
+
+%% Output
+
+    Scores [0.934375   0.91979167 0.940625   0.94583333 0.93958333]
+    Mean Scores 0.9360416666666668
+
+%% Cell type:code id:253d7c20 tags:
+
+``` python
+dump(clf1, 'H:/AI classification/RFC_part2.pkl')
+```
+
+%% Output
+
+    ['H:/AI classification/RFC_part2.pkl']
+
+%% Cell type:code id:943bcea4 tags:
+
+``` python
+# Load testing dataset
+test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
+# predict dataset
+predictions = clf1.predict(test_data)
+predictions_df = pd.DataFrame(predictions)
+# write the result to dataset
+result = pd.concat([test_data,predictions_df], axis=1)
+#create a csv dcument
+result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
+```
+
+%% Cell type:code id:8a8a1e04 tags:
+
+``` python
+print(predictions)
+```
+
+%% Output
+
+    [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+     1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
+     1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+
+%% Cell type:code id:1521abed tags:
+
+``` python
+```