Delete AI_Part1.ipynb

2c80dddb · yl1r22 · 00f66c0c · 00f66c0c
Commit 2c80dddb authored Jun 6, 2023 by yl1r22
--- a/AI_Part1.ipynb
+++ b/AI_Part1.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6064e0b1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "         0            1          2            3           4            5    \\\n",
-      "0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374   \n",
-      "\n",
-      "         6          7          8          9    ...  119  120  121  122  123  \\\n",
-      "0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0   \n",
-      "\n",
-      "   124  125  126  127  128  \n",
-      "0    0    0    0    0    0  \n",
-      "\n",
-      "[1 rows x 129 columns]\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Import scikit-learn dataset library\n",
-    "#from sklearn import datasets\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.model_selection import cross_val_score\n",
-    "from sklearn import svm, metrics\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from joblib import dump\n",
-    "\n",
-    "\n",
-    "df = pd.read_csv('H:\\AI classification\\TrainingDataBinary.csv', header=None)\n",
-    "\n",
-    "print(df.head(1))\n",
-    "\n",
-    "df_feature = df.iloc[:, :128]\n",
-    "\n",
-    "df_label = df.iloc[:, 128]\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "#dftest = pd.read_csv('H:\\AI classification\\TestingDataBinary.csv')\n",
-    "\n",
-    "#X_test = dftest.iloc[:, :128]\n",
-    "\n",
-    "#y_test = dftest.iloc[:, 128]\n",
-    "\n",
-    "\n",
-    "#Load dataset\n",
-    "#cancer = datasets.load_breast_cancer()\n",
-    "\n",
-    "# print the names of the  features\n",
-    "#print(\"Features: \", cancer.feature_names)\n",
-    "\n",
-    "# print the label type of cancer('malignant' 'benign')\n",
-    "#print(\"Labels: \", cancer.target_names)\n",
-    "\n",
-    "# print data(feature)shape\n",
-    "#print (cancer.data.shape)\n",
-    "\n",
-    "\n",
-    "# Split dataset into training set and test set\n",
-    "X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 70% training and 30% test\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "c4bcd563",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.87\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Create a svm Classifier\n",
-    "clf = svm.SVC(kernel='linear') # Linear Kernel\n",
-    "\n",
-    "#Train the model using the training sets\n",
-    "clf.fit(X_train, y_train)\n",
-    "\n",
-    "#Predict the response for test dataset\n",
-    "y_pred = clf.predict(X_test)\n",
-    "\n",
-    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "cdc65331",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.9683333333333334\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a rfc \n",
-    "clf1 = RandomForestClassifier(n_estimators=100, max_features=78)\n",
-    "# Fit\n",
-    "clf1.fit(X_train, y_train)\n",
-    "\n",
-    "y_pred1 = clf1.predict(X_test)\n",
-    "\n",
-    "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred1))\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "12a28ae3",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Scores [0.97604167 0.978125   0.971875   0.97604167 0.98125   ]\n",
-      "Mean Scores 0.9766666666666666\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "scores1 = cross_val_score(clf1, X_train, y_train, cv=5)\n",
-    "\n",
-    "print(\"Scores\", scores1)\n",
-    "print(\"Mean Scores\", np.mean(scores1))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "16ad1a95",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['H:/AI classification/RFC_part1.pkl']"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#dump the model\n",
-    "dump(clf1, 'H:/AI classification/RFC_part1.pkl')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "06c58b04",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load testing dataset\n",
-    "test_data=pd.read_csv('H:\\AI classification\\TestingDataBinary.csv', header=None)\n",
-    "# predict dataset\n",
-    "predictions = clf1.predict(test_data)\n",
-    "predictions_df = pd.DataFrame(predictions)\n",
-    "# write the result to dataset\n",
-    "result = pd.concat([test_data,predictions_df], axis=1)\n",
-    "#create a csv dcument\n",
-    "result.to_csv('H:/AI classification/test_pre1.csv', index = False, header = False)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "c089a3e7",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
-      " 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1\n",
-      " 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "47105ba9",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
-%% Cell type:code id:6064e0b1 tags:
-
-``` python
-#Import scikit-learn dataset library
-#from sklearn import datasets
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import cross_val_score
-from sklearn import svm, metrics
-from sklearn.ensemble import RandomForestClassifier
-import pandas as pd
-import numpy as np
-from joblib import dump
-
-
-df = pd.read_csv('H:\AI classification\TrainingDataBinary.csv', header=None)
-
-print(df.head(1))
-
-df_feature = df.iloc[:, :128]
-
-df_label = df.iloc[:, 128]
-
-
-
-
-#dftest = pd.read_csv('H:\AI classification\TestingDataBinary.csv')
-
-#X_test = dftest.iloc[:, :128]
-
-#y_test = dftest.iloc[:, 128]
-
-
-#Load dataset
-#cancer = datasets.load_breast_cancer()
-
-# print the names of the  features
-#print("Features: ", cancer.feature_names)
-
-# print the label type of cancer('malignant' 'benign')
-#print("Labels: ", cancer.target_names)
-
-# print data(feature)shape
-#print (cancer.data.shape)
-
-
-# Split dataset into training set and test set
-X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 70% training and 30% test
-
-
-```
-
-%% Output
-
-             0            1          2            3           4            5    \
-    0  70.399324  127673.0908 -49.572308  127648.0176 -169.578319  127723.2374
-    
-             6          7          8          9    ...  119  120  121  122  123  \
-    0  65.689611  605.91099 -57.003571  626.78553  ...    0    0    0    0    0
-    
-       124  125  126  127  128
-    0    0    0    0    0    0
-    
-    [1 rows x 129 columns]
-
-%% Cell type:code id:c4bcd563 tags:
-
-``` python
-#Create a svm Classifier
-clf = svm.SVC(kernel='linear') # Linear Kernel
-
-#Train the model using the training sets
-clf.fit(X_train, y_train)
-
-#Predict the response for test dataset
-y_pred = clf.predict(X_test)
-
-print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
-```
-
-%% Output
-
-    Accuracy: 0.87
-
-%% Cell type:code id:cdc65331 tags:
-
-``` python
-# Create a rfc
-clf1 = RandomForestClassifier(n_estimators=100, max_features=78)
-# Fit
-clf1.fit(X_train, y_train)
-
-y_pred1 = clf1.predict(X_test)
-
-print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
-
-```
-
-%% Output
-
-    Accuracy: 0.9683333333333334
-
-%% Cell type:code id:12a28ae3 tags:
-
-``` python
-
-scores1 = cross_val_score(clf1, X_train, y_train, cv=5)
-
-print("Scores", scores1)
-print("Mean Scores", np.mean(scores1))
-```
-
-%% Output
-
-    Scores [0.97604167 0.978125   0.971875   0.97604167 0.98125   ]
-    Mean Scores 0.9766666666666666
-
-%% Cell type:code id:16ad1a95 tags:
-
-``` python
-#dump the model
-dump(clf1, 'H:/AI classification/RFC_part1.pkl')
-```
-
-%% Output
-
-    ['H:/AI classification/RFC_part1.pkl']
-
-%% Cell type:code id:06c58b04 tags:
-
-``` python
-# Load testing dataset
-test_data=pd.read_csv('H:\AI classification\TestingDataBinary.csv', header=None)
-# predict dataset
-predictions = clf1.predict(test_data)
-predictions_df = pd.DataFrame(predictions)
-# write the result to dataset
-result = pd.concat([test_data,predictions_df], axis=1)
-#create a csv dcument
-result.to_csv('H:/AI classification/test_pre1.csv', index = False, header = False)
-```
-
-%% Cell type:code id:c089a3e7 tags:
-
-``` python
-print(predictions)
-```
-
-%% Output
-
-    [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
-     1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
-     1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
-
-%% Cell type:code id:47105ba9 tags:
-
-``` python
-```