Delete AI_Part2.ipynb

7035e3ce · yl1r22 · 9e0248b8 · 9e0248b8
Commit 7035e3ce authored 2 years ago by yl1r22
--- a/AI_Part2.ipynb
+++ b/AI_Part2.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6064e0b1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "         0            1          2            3           4            5    \\\n",
-      "0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374   \n",
-      "\n",
-      "         6          7          8          9    ...  119  120  121  122  123  \\\n",
-      "0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0   \n",
-      "\n",
-      "   124  125  126  127  128  \n",
-      "0    0    0    0    0    0  \n",
-      "\n",
-      "[1 rows x 129 columns]\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Import scikit-learn dataset library\n",
-    "#from sklearn import datasets\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.model_selection import cross_val_score\n",
-    "from sklearn import svm, metrics\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from joblib import dump\n",
-    "\n",
-    "#Read training datasets\n",
-    "df = pd.read_csv('H:\\AI classification\\TrainingDataMulti.csv', header=None)\n",
-    "\n",
-    "# Print the head of csv document to check\n",
-    "print(df.head(1))\n",
-    "\n",
-    "# The first 128 columns are features\n",
-    "df_feature = df.iloc[:, :128]\n",
-    "\n",
-    "# the 129th column is labels\n",
-    "df_label = df.iloc[:, 128]\n",
-    "\n",
-    "\n",
-    "# Split dataset into training set and test set\n",
-    "X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "cdc65331",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.9416666666666667\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a Randomforest Classifier \n",
-    "clf1 = RandomForestClassifier(n_estimators=100, max_features=78)\n",
-    "\n",
-    "# Train the model using the training sets\n",
-    "clf1.fit(X_train, y_train)\n",
-    "\n",
-    "# #Predict the response for test dataset\n",
-    "y_pred1 = clf1.predict(X_test)\n",
-    "\n",
-    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred1))\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "ae7d5339",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Scores [0.94166667 0.95104167 0.934375   0.934375   0.921875  ]\n",
-      "Mean Scores 0.9366666666666668\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Using Cross-validation to evaluate classifier\n",
-    "scores1 = cross_val_score(clf1, X_train, y_train, cv=5)\n",
-    "\n",
-    "#Print model's Scores\n",
-    "print(\"Scores\", scores1)\n",
-    "print(\"Mean Scores\", np.mean(scores1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "253d7c20",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['H:/AI classification/RFC_part2.pkl']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#Dump the model\n",
-    "dump(clf1, 'H:/AI classification/RFC_part2.pkl')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "943bcea4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load testing dataset\n",
-    "test_data=pd.read_csv('H:\\AI classification\\TestingDataMulti.csv', header=None)\n",
-    "\n",
-    "# Predict Testing dataset\n",
-    "predictions = clf1.predict(test_data)\n",
-    "\n",
-    "# Convert predictions into dataframe format\n",
-    "predictions_df = pd.DataFrame(predictions)\n",
-    "\n",
-    "#Write the predictions to testing dataset\n",
-    "result = pd.concat([test_data,predictions_df], axis=1)\n",
-    "\n",
-    "#Output a csv document\n",
-    "result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "8a8a1e04",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2 2 2 2 2 2 1 1 2 2 2 1 1 1 1 1 2 1 1 1 1 1 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1\n",
-      " 1 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0\n",
-      " 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# print precdictions\n",
-    "print(predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1521abed",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
-%% Cell type:code id:6064e0b1 tags:
-``` python
-#Import scikit-learn dataset library
-#from sklearn import datasets
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import cross_val_score
-from sklearn import svm, metrics
-from sklearn.ensemble import RandomForestClassifier
-import pandas as pd
-import numpy as np
-from joblib import dump
-#Read training datasets
-df = pd.read_csv('H:\AI classification\TrainingDataMulti.csv', header=None)
-# Print the head of csv document to check
-print(df.head(1))
-# The first 128 columns are features
-df_feature = df.iloc[:, :128]
-# the 129th column is labels
-df_label = df.iloc[:, 128]
-# Split dataset into training set and test set
-X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test
-```
-%% Output
-             0            1          2            3           4            5    \
-    0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374
-             6          7          8          9    ...  119  120  121  122  123  \
-    0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0
-       124  125  126  127  128
-    0    0    0    0    0    0
-    [1 rows x 129 columns]
-%% Cell type:code id:cdc65331 tags:
-``` python
-# Create a Randomforest Classifier
-clf1 = RandomForestClassifier(n_estimators=100, max_features=78)
-# Train the model using the training sets
-clf1.fit(X_train, y_train)
-# #Predict the response for test dataset
-y_pred1 = clf1.predict(X_test)
-print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
-```
-%% Output
-    Accuracy: 0.9416666666666667
-%% Cell type:code id:ae7d5339 tags:
-``` python
-# Using Cross-validation to evaluate classifier
-scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
-#Print model's Scores
-print("Scores", scores1)
-print("Mean Scores", np.mean(scores1))
-```
-%% Output
-    Scores [0.94166667 0.95104167 0.934375   0.934375   0.921875  ]
-    Mean Scores 0.9366666666666668
-%% Cell type:code id:253d7c20 tags:
-``` python
-#Dump the model
-dump(clf1, 'H:/AI classification/RFC_part2.pkl')
-```
-%% Output
-    ['H:/AI classification/RFC_part2.pkl']
-%% Cell type:code id:943bcea4 tags:
-``` python
-# Load testing dataset
-test_data=pd.read_csv('H:\AI classification\TestingDataMulti.csv', header=None)
-# Predict Testing dataset
-predictions = clf1.predict(test_data)
-# Convert predictions into dataframe format
-predictions_df = pd.DataFrame(predictions)
-#Write the predictions to testing dataset
-result = pd.concat([test_data,predictions_df], axis=1)
-#Output a csv document
-result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)
-```
-%% Cell type:code id:8a8a1e04 tags:
-``` python
-# print precdictions
-print(predictions)
-```
-%% Output
-    [2 2 2 2 2 2 1 1 2 2 2 1 1 1 1 1 2 1 1 1 1 1 2 2 2 2 0 0 0 0 0 0 0 1 1 1 1
-     1 2 2 1 1 2 2 2 2 2 2 2 1 2 2 2 1 2 2 2 1 1 2 1 1 1 1 1 1 0 0 0 0 0 0 0 0
-     0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
-%% Cell type:code id:1521abed tags:
-``` python
-```