diff --git a/AI_Part2.ipynb b/AI_Part2.ipynb deleted file mode 100644 index a1468fb43a8022f4a1e75fb0cec414b024aea8b9..0000000000000000000000000000000000000000 --- a/AI_Part2.ipynb +++ /dev/null @@ -1,233 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "6064e0b1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0 1 2 3 4 5 \\\n", - "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 127723.2374 \n", - "\n", - " 6 7 8 9 ... 119 120 121 122 123 \\\n", - "0 65.689611 605.91099 -57.003571 626.78553 ... 0 0 0 0 0 \n", - "\n", - " 124 125 126 127 128 \n", - "0 0 0 0 0 0 \n", - "\n", - "[1 rows x 129 columns]\n" - ] - } - ], - "source": [ - "#Import scikit-learn dataset library\n", - "#from sklearn import datasets\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn import svm, metrics\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "import pandas as pd\n", - "import numpy as np\n", - "from joblib import dump\n", - "\n", - "\n", - "df = pd.read_csv('H:\\AI classification\\TrainingDataMulti.csv', header=None)\n", - "\n", - "print(df.head(1))\n", - "\n", - "df_feature = df.iloc[:, :128]\n", - "\n", - "df_label = df.iloc[:, 128]\n", - "\n", - "\n", - "\n", - "\n", - "#dftest = pd.read_csv('H:\\AI classification\\TestingDataBinary.csv')\n", - "\n", - "#X_test = dftest.iloc[:, :128]\n", - "\n", - "#y_test = dftest.iloc[:, 128]\n", - "\n", - "\n", - "#Load dataset\n", - "#cancer = datasets.load_breast_cancer()\n", - "\n", - "# print the names of the features\n", - "#print(\"Features: \", cancer.feature_names)\n", - "\n", - "# print the label type of cancer('malignant' 'benign')\n", - "#print(\"Labels: \", cancer.target_names)\n", - "\n", - "# print data(feature)shape\n", - "#print (cancer.data.shape)\n", - "\n", - "\n", - "# Split dataset into training set and test set\n", - "X_train, X_test, y_train, y_test = train_test_split(df_feature, df_label, test_size=0.2) # 80% training and 20% test\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4bcd563", - "metadata": {}, - "outputs": [], - "source": [ - "#Create a svm Classifier\n", - "clf = svm.SVC(kernel='linear') # Linear Kernel\n", - "\n", - "#Train the model using the training sets\n", - "clf.fit(X_train, y_train)\n", - "\n", - "#Predict the response for test dataset\n", - "y_pred = clf.predict(X_test)\n", - "\n", - "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "cdc65331", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy: 0.9558333333333333\n" - ] - } - ], - "source": [ - "# Create a rfc \n", - "clf1 = RandomForestClassifier(n_estimators=200, max_features=78)\n", - "\n", - "clf1.fit(X_train, y_train)\n", - "\n", - "y_pred1 = clf1.predict(X_test)\n", - "\n", - "print(\"Accuracy:\",metrics.accuracy_score(y_test, y_pred1))\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "ae7d5339", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Scores [0.934375 0.91979167 0.940625 0.94583333 0.93958333]\n", - "Mean Scores 0.9360416666666668\n" - ] - } - ], - "source": [ - "scores1 = cross_val_score(clf1, X_train, y_train, cv=5)\n", - "\n", - "print(\"Scores\", scores1)\n", - "print(\"Mean Scores\", np.mean(scores1))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "253d7c20", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['H:/AI classification/RFC_part2.pkl']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dump(clf1, 'H:/AI classification/RFC_part2.pkl')" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "943bcea4", - "metadata": {}, - "outputs": [], - "source": [ - "# Load testing dataset\n", - "test_data=pd.read_csv('H:\\AI classification\\TestingDataMulti.csv', header=None)\n", - "# predict dataset\n", - "predictions = clf1.predict(test_data)\n", - "predictions_df = pd.DataFrame(predictions)\n", - "# write the result to dataset\n", - "result = pd.concat([test_data,predictions_df], axis=1)\n", - "#create a csv dcument\n", - "result.to_csv('H:/AI classification/test_pre2.csv', index = False, header = False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "8a8a1e04", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", - " 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1\n", - " 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" - ] - } - ], - "source": [ - "print(predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1521abed", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}