diff --git a/AI_Part2.ipynb b/AI_Part2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a1468fb43a8022f4a1e75fb0cec414b024aea8b9 --- /dev/null +++ b/AI_Part2.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "6064e0b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4 5 \\\n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374 \n", + "\n", + " 6 7 8 9 ... 119 120 121 122 123 \\\n", + "0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0 \n", + "\n", + " 124 125 126 127 128 \n", + "0 0 0 0 0 0 \n", + "\n", + "[1 rows x 129 columns]\n" + ] + } + ], + "source": [ + "#Import scikit-learn dataset library\n", + "#from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn import svm, metrics\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "import pandas as pd\n", + "import numpy as np\n", + "from joblib import dump\n", + "\n", + "\n", + "df = pd.read_csv('H:\\AI classification\\TrainingDataMulti.csv', header=None)\n", + "\n", + "print(df.head(1))\n", + "\n", + "df_feature = df.iloc[:, :128]\n", + "\n", + "df_label = df.iloc[:, 128]\n", + "\n", + "\n", + "\n", + "\n", + "#dftest = pd.read_csv('H:\\AI classification\\TestingDataBinary.csv')\n", + "\n", + "#X_test = dftest.iloc[:, :128]\n", + "\n", + "#y_test = dftest.iloc[:, 128]\n", + "\n", + "\n", + "#Load dataset\n", + "#cancer = datasets.load_breast_cancer()\n", + "\n", + "# print the names of the features\n", + "#print(\"Features: \", cancer.feature_names)\n", + "\n", + "# print the label type of cancer('malignant' 'benign')\n", + "#print(\"Labels: \", cancer.target_names)\n", + "\n", + "# print data(feature)shape\n", + "#print (cancer.data.shape)\n", + "\n", + "\n", + "# Split dataset into training set and test set\n", + "X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4bcd563", + "metadata": {}, + "outputs": [], + "source": [ + "#Create a svm Classifier\n", + "clf = svm.SVC(kernel='linear') # Linear Kernel\n", + "\n", + "#Train the model using the training sets\n", + "clf.fit(X_train, y_train)\n", + "\n", + "#Predict the response for test dataset\n", + "y_pred = clf.predict(X_test)\n", + "\n", + "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cdc65331", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9558333333333333\n" + ] + } + ], + "source": [ + "# Create a rfc \n", + "clf1 = RandomForestClassifier(n_estimators=200, max_features=78)\n", + "\n", + "clf1.fit(X_train, y_train)\n", + "\n", + "y_pred1 = clf1.predict(X_test)\n", + "\n", + "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred1))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ae7d5339", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Scores [0.934375 0.91979167 0.940625 0.94583333 0.93958333]\n", + "Mean Scores 0.9360416666666668\n" + ] + } + ], + "source": [ + "scores1 = cross_val_score(clf1, X_train, y_train, cv=5)\n", + "\n", + "print(\"Scores\", scores1)\n", + "print(\"Mean Scores\", np.mean(scores1))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "253d7c20", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['H:/AI classification/RFC_part2.pkl']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dump(clf1, 'H:/AI classification/RFC_part2.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "943bcea4", + "metadata": {}, + "outputs": [], + "source": [ + "# Load testing dataset\n", + "test_data=pd.read_csv('H:\\AI classification\\TestingDataMulti.csv', header=None)\n", + "# predict dataset\n", + "predictions = clf1.predict(test_data)\n", + "predictions_df = pd.DataFrame(predictions)\n", + "# write the result to dataset\n", + "result = pd.concat([test_data,predictions_df], axis=1)\n", + "#create a csv dcument\n", + "result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "8a8a1e04", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + " 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1\n", + " 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" + ] + } + ], + "source": [ + "print(predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1521abed", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}