From f43805ea6c05b0798c402c3a92574a6f0c2699db Mon Sep 17 00:00:00 2001 From: yyl1c20 <yyl1c20@soton.ac.uk> Date: Wed, 7 Jun 2023 10:34:44 +0000 Subject: [PATCH] Upload New File --- partAknn_v1.ipynb | 1819 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1819 insertions(+) create mode 100644 partAknn_v1.ipynb diff --git a/partAknn_v1.ipynb b/partAknn_v1.ipynb new file mode 100644 index 0000000..58c14bb --- /dev/null +++ b/partAknn_v1.ipynb @@ -0,0 +1,1819 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1. Loading Datasets</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[ TrainingDataBinary.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 6000 entries, 0 to 5999\n", + "Columns: 129 entries, R1-PA1:VH to marker\n", + "dtypes: float64(112), int64(17)\n", + "memory usage: 5.9 MB\n", + "\n", + "[ TestingDataBinary.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 100 entries, 0 to 99\n", + "Columns: 128 entries, R1-PA1:VH to snort_log4\n", + "dtypes: float64(112), int64(16)\n", + "memory usage: 100.1 KB\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "mTrain = pd.read_csv(\"TrainingDataBinary.csv\")\n", + "mTest = pd.read_csv(\"TestingDataBinary.csv\")\n", + "\n", + "\n", + "print(\"\\n[ TrainingDataBinary.csv info ]\")\n", + "mTrain.info()\n", + "\n", + "print(\"\\n[ TestingDataBinary.csv info ]\")\n", + "mTest.info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1.1 Analysing the Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "R1-PA1:VH float64\n", + "R1-PM1:V float64\n", + "R1-PA2:VH float64\n", + "R1-PM2:V float64\n", + "R1-PA3:VH float64\n", + " ... \n", + "snort_log1 int64\n", + "snort_log2 int64\n", + "snort_log3 int64\n", + "snort_log4 int64\n", + "marker int64\n", + "Length: 129, dtype: object" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 3000\n", + "1 3000\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain['marker'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.isnull().sum()\n", + "mTrain = mTrain.dropna()\n", + "mTrain\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log3</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 128 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log3 control_panel_log4 relay1_log relay2_log \n", + "0 0 0 0 0 \\\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "5995 0 0 0 0 \n", + "5996 0 0 0 0 \n", + "5997 0 0 0 0 \n", + "5998 0 0 0 0 \n", + "5999 0 0 0 0 \n", + "\n", + " relay3_log relay4_log snort_log1 snort_log2 snort_log3 snort_log4 \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "5995 0 0 0 0 0 0 \n", + "5996 0 0 0 0 0 0 \n", + "5997 0 0 0 0 0 0 \n", + "5998 0 0 0 0 0 0 \n", + "5999 0 0 0 0 0 0 \n", + "\n", + "[6000 rows x 128 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = mTrain.drop(columns = 'marker')\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "y = mTrain['marker']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stratified Train-Test Split\n", + "The train-test split is stratified to ensure that the train and test samples from each class are almost the same percentage. This may be desirable for imbalanced number of samples as in this case. \n", + "\n", + "In such imbalanced datasets, the stratified K fold cross validation is used instead of the K-fold cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, shuffle = True, test_size=0.2, stratify=y)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "1 2400\n", + "0 2400\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "1 600\n", + "0 600\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>3. Choosing a Model: KNN , training, and evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier(n_neighbors=18)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsClassifier</label><div class=\"sk-toggleable__content\"><pre>KNeighborsClassifier(n_neighbors=18)</pre></div></div></div></div></div>" + ], + "text/plain": [ + "KNeighborsClassifier(n_neighbors=18)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn = KNeighborsClassifier(n_neighbors=18)\n", + "knn.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 0, ..., 0, 0, 0], dtype=int64)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "firstknn_pred = knn.predict(X_test)\n", + "firstknn_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.845\n", + "Precision: 0.9620535714285714\n", + "Recall: 0.7183333333333334\n" + ] + } + ], + "source": [ + "accuracy = accuracy_score(y_test, firstknn_pred)\n", + "precision = precision_score(y_test, firstknn_pred)\n", + "recall = recall_score(y_test, firstknn_pred)\n", + "\n", + "print(\"Accuracy:\", accuracy)\n", + "print(\"Precision:\", precision)\n", + "print(\"Recall:\", recall)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " normal 0.78 0.97 0.86 600\n", + "data injection attack 0.96 0.72 0.82 600\n", + "\n", + " accuracy 0.84 1200\n", + " macro avg 0.87 0.84 0.84 1200\n", + " weighted avg 0.87 0.84 0.84 1200\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "confusion_matrix(y_test, firstknn_pred)\n", + "labels = ['normal', 'data injection attack']\n", + "print(classification_report(y_test, firstknn_pred, target_names = labels))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1SUlEQVR4nO3deXhU5Rn38d9kX2dC0CREkgCiQJRFUWHqBhqJSBEK1mpRo0V8xYAIBdFX2VV8cUHRCFaRgIWKS6GCiCLKogQsIBYRoiCYICRokYQEss2c9w/M2BHQDDPJMHO+n+s618Wc9Z425s59P885x2IYhiEAABC0QvwdAAAAaFwkewAAghzJHgCAIEeyBwAgyJHsAQAIciR7AACCHMkeAIAgF+bvALzhdDq1b98+xcfHy2Kx+DscAICHDMPQ4cOHlZqaqpCQxqs/q6qqVFNT4/V5IiIiFBUV5YOImlZAJ/t9+/YpLS3N32EAALxUXFysli1bNsq5q6qq1DojTiUHHF6fKyUlRbt37w64hB/QyT4+Pl6S9O3mVrLGMSKB4PSHczv6OwSg0dSpVh9rmev3eWOoqalRyQGHvt3UStb4U88V5Yedyui6RzU1NST7plTfurfGhXj1fyBwOguzhPs7BKDx/PTA9qYYio2Ltygu/tSv41TgDhcHdLIHAKChHIZTDi/eBuMwnL4LpomR7AEApuCUIadOPdt7c6y/0fsGACDIUdkDAEzBKae8acR7d7R/kewBAKbgMAw5jFNvxXtzrL/RxgcAIMhR2QMATMHME/RI9gAAU3DKkMOkyZ42PgAAQY7KHgBgCrTxAQAIcszGBwAAQYvKHgBgCs6fFm+OD1QkewCAKTi8nI3vzbH+RrIHAJiCw5CXb73zXSxNjTF7AACCHJU9AMAUGLMHACDIOWWRQxavjg9UtPEBAAhyVPYAAFNwGscWb44PVCR7AIApOLxs43tzrL/RxgcAIMhR2QMATMHMlT3JHgBgCk7DIqfhxWx8L471N9r4AAAEOSp7AIAp0MYHACDIORQihxcNbYcPY2lqJHsAgCkYXo7ZG4zZAwCA0xWVPQDAFBizBwAgyDmMEDkML8bsA/hxubTxAQAIclT2AABTcMoipxc1rlOBW9qT7AEApmDmMXva+AAABDkqewCAKXg/QY82PgAAp7VjY/ZevAiHNj4AADhdUdkDAEzB6eWz8ZmNDwDAaY4xewAAgpxTIaa9z54xewAAghyVPQDAFByGRQ4vXlPrzbH+RrIHAJiCw8sJeg7a+AAA4HRFZQ8AMAWnESKnF7PxnczGBwDg9EYbHwAABC2SPQDAFJz6eUb+qSxOD683ceJEWSwWt6V9+/au7VVVVcrNzVXz5s0VFxengQMHqrS01O0cRUVF6tOnj2JiYpSUlKQxY8aorq7O4+9OGx8AYAreP1TH82PPO+88ffDBB67PYWE/p92RI0fqnXfe0RtvvCGbzaZhw4ZpwIAB+uSTTyRJDodDffr0UUpKitatW6f9+/frtttuU3h4uB577DGP4iDZAwDggfLycrfPkZGRioyMPOG+YWFhSklJOW59WVmZZs+erQULFuiqq66SJM2ZM0cdOnTQ+vXr1b17d73//vv68ssv9cEHHyg5OVldunTRlClTNHbsWE2cOFERERENjpk2PgDAFOqfje/NIklpaWmy2WyuZerUqSe95tdff63U1FS1adNGgwYNUlFRkSRp06ZNqq2tVVZWlmvf9u3bKz09XQUFBZKkgoICdezYUcnJya59srOzVV5erm3btnn03ansAQCm4Kv32RcXF8tqtbrWn6yq79atm/Lz89WuXTvt379fkyZN0uWXX64vvvhCJSUlioiIUEJCgtsxycnJKikpkSSVlJS4Jfr67fXbPEGyBwCYgvdvvTt2rNVqdUv2J9O7d2/Xvzt16qRu3bopIyNDr7/+uqKjo085jlNBGx8AgCaQkJCgc889Vzt37lRKSopqamp06NAht31KS0tdY/wpKSnHzc6v/3yieQC/hmQPADCF+ofqeLN4o6KiQrt27VKLFi3UtWtXhYeHa+XKla7thYWFKioqkt1ulyTZ7XZt3bpVBw4ccO2zYsUKWa1WZWZmenRt2vgAAFNwGhY5vXhznafHjh49Wn379lVGRob27dunCRMmKDQ0VDfffLNsNpsGDx6sUaNGKTExUVarVcOHD5fdblf37t0lSb169VJmZqZuvfVWTZs2TSUlJXr44YeVm5t70nkCJ0OyBwCgEezdu1c333yz/vvf/+rMM8/UZZddpvXr1+vMM8+UJE2fPl0hISEaOHCgqqurlZ2drRdeeMF1fGhoqJYuXaqhQ4fKbrcrNjZWOTk5mjx5ssexkOwBAKbg9LIV7+lDdV577bVf3R4VFaW8vDzl5eWddJ+MjAwtW7bMo+ueCMkeAGAK3r/1LnCnuQVu5AAAoEGo7AEApuCQRQ4vHqrjzbH+RrIHAJgCbXwAABC0qOwBAKbgkHeteIfvQmlyJHsAgCmYuY1PsgcAmIKvXoQTiAI3cgAA0CBU9gAAUzC8fJ+9wa13AACc3mjjAwCAoEVlDwAwhaZ+xe3phGQPADAFh5dvvfPmWH8L3MgBAECDUNkDAEyBNj4AAEHOqRA5vWhoe3OsvwVu5AAAoEGo7AEApuAwLHJ40Yr35lh/I9kDAEyBMXsAAIKc4eVb7wyeoAcAAE5XVPYAAFNwyCKHFy+z8eZYfyPZAwBMwWl4N+7uNHwYTBOjjQ8AQJCjsje5V59M0d+fTnFb1/LsKs1eu0OSdPBAmF6ekqrNa+J1pCJEaWdX66YRpbq8T5lr/wk5rbVrW7QO/TdM8TaHLrj8sAY/tE/NU+qa9LsADXV+twr98Z7vdU7HI2qeUqeJf2mlguU21/b39n1+wuNemtJCb85Maqow4WNOLyfoeXOsv5HsoYx2R/X4wl2uz6GhP/eqnrg3XRXloZqYv1u2xDp9tKiZHvs/rfTcu1+pbcejkqTOl1bopntLlZhcqx/2h+ulyWdpypDWembJ103+XYCGiIpx6pttUXrvH4ma8Mqe47bf1DnT7fPFVx3WyKeK9fE7tuP2ReBwyiKnF+Pu3hzrb6fFnyl5eXlq1aqVoqKi1K1bN3366af+DslUQkOlxKQ612Jr7nBt+3JjrPr95Qe1v+CIWmTU6M/3lSrW5tDX/4l27TPgru/VoesRJbes1XkXH9GfhpVqx+YY1dX649sAv23jR1bNndZC65afOHn/+H2422LPLtPnn8SppCiyiSMFfMPvyX7hwoUaNWqUJkyYoM2bN6tz587Kzs7WgQMH/B2aaXy3O0I3X3Cecrp30OO56TqwN9y1LfOiSq1+O0HlP4bK6ZRWLU5QTZVFnX5XccJzlf8Yqg//2UyZF1UqLPyEuwABJeGMWl1ydbneey3R36HAS/VP0PNmCVR+T/ZPP/20hgwZojvuuEOZmZmaNWuWYmJi9Morr/g7NFNof2GlRj9TpEfn79Lwx/eqpChSf/3DOTpScexH46EXv5Wj1qI/ntdRv2/VWc+OTdOE2Xt0Vusat/O8/EgLXX92R/3xvI76fl+EJs7Z7Y+vA/jcNTf+qKMVofp4GS38QFc/Zu/NEqj8GnlNTY02bdqkrKws17qQkBBlZWWpoKDguP2rq6tVXl7utsA7F191WFf0LVObzCpd1OOwHvn7N6ooD9WatxMkSXOnpaiiPFSPL9yp594t1MC7DujRu1tp9/Yot/P8cegBvfD+V3rsHzsVEmLoiRHpMgL4NhWgXvZNB/XhogTVVgfuL3rArz+9P/zwgxwOh5KTk93WJycnq6Sk5Lj9p06dKpvN5lrS0tKaKlTTiLM51LJNtfbtidS+PRF6e86ZGvV0sS64vEJnn1elW/5aqnM6HdHb+We4HWdr7lDLs6vV9coKPTjzW3260qbtm2L89C0A3zj/kgqlta3W8gXN/R0KfMApi+v5+Ke0MEGvaTz44IMqKytzLcXFxf4OKegcrQzRvm8jlJhUq+qjx348QkLcS/TQUEOG8+TnqN9WWxNQP17AcbJvPqivPo/WN19G//bOOO0ZP83GP9XFCOBk79db78444wyFhoaqtLTUbX1paalSUlKO2z8yMlKRkcyG9aW/TUpV915lSmpZq/+WhOnVJ1soNETq8YcfFWd1KLV1tZ69P01Dxu+TtVmd1i23afOaeE2e940kacfmGBVuidH5l1QqLqFO+/dEau60FLVoVa0OXSv9/O2AE4uKcSj1f+adpKTVqM15R3X4UKi+/y5CkhQT59AVfcv0t0kt/BUmfIy33vlJRESEunbtqpUrV6p///6SJKfTqZUrV2rYsGH+DM00ftgfrqn3tNLhH0Nla16n8y6u1DNLv1LCT7ffPfLqLs1+LFUTclrraGWIUlvXaPSzRbrk6sOSpMhopz5516ZXn0pR1ZEQJSbV6qKeh/XQiG8VEcmgPU5P53Y+qife+vnZEndP2idJen9hMz01Ml2SdGW/Q5LF0EeLm/kjRMCnLIbh32lUCxcuVE5Ojl588UVdcskleuaZZ/T6669rx44dx43l/1J5eblsNpt+/KqNrPG0jBGcslO7+DsEoNHUGbVapX+prKxMVqu1Ua5Rnyv+sOIOhcdGnPJ5aitrtOiaOY0aa2Px+xP0/vSnP+n777/X+PHjVVJSoi5dumj58uW/megBAPAEbXw/GzZsGG17AAAayWmR7AEAaGxmfjY+yR4AYApmbuMzqw0AgCBHZQ8AMAUzV/YkewCAKZg52dPGBwAgyFHZAwBMwcyVPckeAGAKhry7fS6QHwBOsgcAmIKZK3vG7AEACHJU9gAAUzBzZU+yBwCYgpmTPW18AACCHJU9AMAUzFzZk+wBAKZgGBYZXiRsb471N9r4AAAEOZI9AMAU6t9n781yqh5//HFZLBbdd999rnVVVVXKzc1V8+bNFRcXp4EDB6q0tNTtuKKiIvXp00cxMTFKSkrSmDFjVFdX5/H1SfYAAFOoH7P3ZjkV//73v/Xiiy+qU6dObutHjhypJUuW6I033tDq1au1b98+DRgwwLXd4XCoT58+qqmp0bp16zR37lzl5+dr/PjxHsdAsgcAoJFUVFRo0KBBeumll9SsWTPX+rKyMs2ePVtPP/20rrrqKnXt2lVz5szRunXrtH79eknS+++/ry+//FJ///vf1aVLF/Xu3VtTpkxRXl6eampqPIqDZA8AMIX6CXreLJJUXl7utlRXV5/0mrm5uerTp4+ysrLc1m/atEm1tbVu69u3b6/09HQVFBRIkgoKCtSxY0clJye79snOzlZ5ebm2bdvm0Xcn2QMATMFXbfy0tDTZbDbXMnXq1BNe77XXXtPmzZtPuL2kpEQRERFKSEhwW5+cnKySkhLXPv+b6Ou312/zBLfeAQBMwVe33hUXF8tqtbrWR0ZGHrdvcXGxRowYoRUrVigqKuqUr+krVPYAAHjAarW6LSdK9ps2bdKBAwd04YUXKiwsTGFhYVq9erVmzJihsLAwJScnq6amRocOHXI7rrS0VCkpKZKklJSU42bn13+u36ehSPYAAFMwvGzhe9IVuPrqq7V161Zt2bLFtVx00UUaNGiQ69/h4eFauXKl65jCwkIVFRXJbrdLkux2u7Zu3aoDBw649lmxYoWsVqsyMzM9+u608QEApmBIMgzvjm+o+Ph4nX/++W7rYmNj1bx5c9f6wYMHa9SoUUpMTJTVatXw4cNlt9vVvXt3SVKvXr2UmZmpW2+9VdOmTVNJSYkefvhh5ebmnrCb8GtI9gAA+MH06dMVEhKigQMHqrq6WtnZ2XrhhRdc20NDQ7V06VINHTpUdrtdsbGxysnJ0eTJkz2+FskeAGAKTllk8eIpeN48QU+SVq1a5fY5KipKeXl5ysvLO+kxGRkZWrZsmVfXlUj2AACT4EU4AAAgaFHZAwBMwWlYZOF99gAABC/D8HI2vhfH+httfAAAghyVPQDAFMw8QY9kDwAwBZI9AABBzswT9BizBwAgyFHZAwBMwcyz8Un2AABTOJbsvRmz92EwTYw2PgAAQY7KHgBgCszGBwAgyBny7J30Jzo+UNHGBwAgyFHZAwBMgTY+AADBzsR9fJI9AMAcvKzsFcCVPWP2AAAEOSp7AIAp8AQ9AACCnJkn6NHGBwAgyFHZAwDMwbB4N8kugCt7kj0AwBTMPGZPGx8AgCBHZQ8AMAceqgMAQHAz82z8BiX7t99+u8EnvP766085GAAA4HsNSvb9+/dv0MksFoscDoc38QAA0HgCuBXvjQYle6fT2dhxAADQqMzcxvdqNn5VVZWv4gAAoHEZPlgClMfJ3uFwaMqUKTrrrLMUFxenb775RpI0btw4zZ492+cBAgAA73ic7B999FHl5+dr2rRpioiIcK0///zz9fLLL/s0OAAAfMfigyUweZzs582bp7/97W8aNGiQQkNDXes7d+6sHTt2+DQ4AAB8hjZ+w3333Xdq27btceudTqdqa2t9EhQAAPAdj5N9Zmam1q5de9z6N998UxdccIFPggIAwOdMXNl7/AS98ePHKycnR999952cTqf++c9/qrCwUPPmzdPSpUsbI0YAALxn4rfeeVzZ9+vXT0uWLNEHH3yg2NhYjR8/Xtu3b9eSJUt0zTXXNEaMAADAC6f0bPzLL79cK1as8HUsAAA0GjO/4vaUX4SzceNGbd++XdKxcfyuXbv6LCgAAHyOt9413N69e3XzzTfrk08+UUJCgiTp0KFD+t3vfqfXXntNLVu29HWMAADACx6P2d95552qra3V9u3bdfDgQR08eFDbt2+X0+nUnXfe2RgxAgDgvfoJet4sAcrjyn716tVat26d2rVr51rXrl07Pffcc7r88st9GhwAAL5iMY4t3hwfqDxO9mlpaSd8eI7D4VBqaqpPggIAwOdMPGbvcRv/iSee0PDhw7Vx40bXuo0bN2rEiBF68sknfRocAADwXoMq+2bNmsli+XmsorKyUt26dVNY2LHD6+rqFBYWpr/85S/q379/owQKAIBXTPxQnQYl+2eeeaaRwwAAoJGZuI3foGSfk5PT2HEAAIBGcsoP1ZGkqqoq1dTUuK2zWq1eBQQAQKMwcWXv8QS9yspKDRs2TElJSYqNjVWzZs3cFgAATksmfuudx8n+/vvv14cffqiZM2cqMjJSL7/8siZNmqTU1FTNmzevMWIEAABe8LiNv2TJEs2bN089evTQHXfcocsvv1xt27ZVRkaG5s+fr0GDBjVGnAAAeMfEs/E9ruwPHjyoNm3aSDo2Pn/w4EFJ0mWXXaY1a9b4NjoAAHyk/gl63iyByuNk36ZNG+3evVuS1L59e73++uuSjlX89S/GAQAApw+Pk/0dd9yhzz//XJL0wAMPKC8vT1FRURo5cqTGjBnj8wABAPCJJp6gN3PmTHXq1ElWq1VWq1V2u13vvvuua3tVVZVyc3PVvHlzxcXFaeDAgSotLXU7R1FRkfr06aOYmBglJSVpzJgxqqur8/irezxmP3LkSNe/s7KytGPHDm3atElt27ZVp06dPA4AAIBg1LJlSz3++OM655xzZBiG5s6dq379+umzzz7Teeedp5EjR+qdd97RG2+8IZvNpmHDhmnAgAH65JNPJB1750yfPn2UkpKidevWaf/+/brtttsUHh6uxx57zKNYLIZhBOwoRHl5uWw2m378qo2s8R43KYCAkJ3axd8hAI2mzqjVKv1LZWVljfaclvpckfH/HlFIVNQpn8dZVaVvxz7sVayJiYl64okndMMNN+jMM8/UggULdMMNN0iSduzYoQ4dOqigoEDdu3fXu+++q9///vfat2+fkpOTJUmzZs3S2LFj9f333ysiIqLB121QZT9jxowGn/Dee+9t8L4AAASa8vJyt8+RkZGKjIz81WMcDofeeOMNVVZWym63a9OmTaqtrVVWVpZrn/bt2ys9Pd2V7AsKCtSxY0dXopek7OxsDR06VNu2bdMFF1zQ4JgblOynT5/eoJNZLBa/JPtrxt6hsPBT/2sNOJ2d9fFOf4cANJrayhqpVxNdzEe33qWlpbmtnjBhgiZOnHjCQ7Zu3Sq73a6qqirFxcVp0aJFyszM1JYtWxQREXHcxPbk5GSVlJRIkkpKStwSff32+m2eaFCyr599DwBAwPLR43KLi4vd2vi/VtW3a9dOW7ZsUVlZmd58803l5ORo9erVXgRxarx6Nj4AAGZTP7u+ISIiItS2bVtJUteuXfXvf/9bzz77rP70pz+ppqZGhw4dcqvuS0tLlZKSIklKSUnRp59+6na++tn69fs0FLPaAADmcBo8G9/pdKq6ulpdu3ZVeHi4Vq5c6dpWWFiooqIi2e12SZLdbtfWrVt14MAB1z4rVqyQ1WpVZmamR9elsgcAmIK3T8Hz9NgHH3xQvXv3Vnp6ug4fPqwFCxZo1apVeu+992Sz2TR48GCNGjVKiYmJslqtGj58uOx2u7p37y5J6tWrlzIzM3Xrrbdq2rRpKikp0cMPP6zc3NzfnBD4SyR7AAAawYEDB3Tbbbdp//79stls6tSpk9577z1dc801ko5Nfg8JCdHAgQNVXV2t7OxsvfDCC67jQ0NDtXTpUg0dOlR2u12xsbHKycnR5MmTPY6FZA8AMIcmfp/97Nmzf3V7VFSU8vLylJeXd9J9MjIytGzZMs8ufAKnNGa/du1a3XLLLbLb7fruu+8kSa+++qo+/vhjrwMCAKBRnAZj9v7icbJ/6623lJ2drejoaH322Weqrq6WJJWVlXn8+D4AAND4PE72jzzyiGbNmqWXXnpJ4eHhrvWXXnqpNm/e7NPgAADwFTO/4tbjMfvCwkJdccUVx6232Ww6dOiQL2ICAMD3fPQEvUDkcWWfkpKinTuPf3znxx9/rDZt2vgkKAAAfI4x+4YbMmSIRowYoQ0bNshisWjfvn2aP3++Ro8eraFDhzZGjAAAwAset/EfeOABOZ1OXX311Tpy5IiuuOIKRUZGavTo0Ro+fHhjxAgAgNea+qE6pxOPk73FYtFDDz2kMWPGaOfOnaqoqFBmZqbi4uIaIz4AAHyjie+zP52c8kN1IiIiPH42LwAAaHoeJ/uePXvKYjn5jMQPP/zQq4AAAGgU3t4+Z6bKvkuXLm6fa2trtWXLFn3xxRfKycnxVVwAAPgWbfyGmz59+gnXT5w4URUVFV4HBAAAfMtn77O/5ZZb9Morr/jqdAAA+JaJ77P32VvvCgoKFBUV5avTAQDgU9x654EBAwa4fTYMQ/v379fGjRs1btw4nwUGAAB8w+Nkb7PZ3D6HhISoXbt2mjx5snr16uWzwAAAgG94lOwdDofuuOMOdezYUc2aNWusmAAA8D0Tz8b3aIJeaGioevXqxdvtAAABx8yvuPV4Nv7555+vb775pjFiAQAAjcDjZP/II49o9OjRWrp0qfbv36/y8nK3BQCA05YJb7uTPBiznzx5sv7617/quuuukyRdf/31bo/NNQxDFotFDofD91ECAOAtE4/ZNzjZT5o0SXfffbc++uijxowHAAD4WIOTvWEc+5PmyiuvbLRgAABoLDxUp4F+7W13AACc1mjjN8y55577mwn/4MGDXgUEAAB8y6NkP2nSpOOeoAcAQCCgjd9AN910k5KSkhorFgAAGo+J2/gNvs+e8XoAAAKTx7PxAQAISCau7Buc7J1OZ2PGAQBAo2LMHgCAYGfiyt7jZ+MDAIDAQmUPADAHE1f2JHsAgCmYecyeNj4AAEGOyh4AYA608QEACG608QEAQNCisgcAmANtfAAAgpyJkz1tfAAAghyVPQDAFCw/Ld4cH6hI9gAAczBxG59kDwAwBW69AwAAQYvKHgBgDrTxAQAwgQBO2N6gjQ8AQJCjsgcAmIKZJ+iR7AEA5mDiMXva+AAABDkqewCAKdDGBwAg2NHGBwAAwYrKHgBgCmZu41PZAwDMwfDB4oGpU6fq4osvVnx8vJKSktS/f38VFha67VNVVaXc3Fw1b95ccXFxGjhwoEpLS932KSoqUp8+fRQTE6OkpCSNGTNGdXV1HsVCsgcAmEMTJ/vVq1crNzdX69ev14oVK1RbW6tevXqpsrLStc/IkSO1ZMkSvfHGG1q9erX27dunAQMGuLY7HA716dNHNTU1WrdunebOnav8/HyNHz/eo1ho4wMA4IHy8nK3z5GRkYqMjDxuv+XLl7t9zs/PV1JSkjZt2qQrrrhCZWVlmj17thYsWKCrrrpKkjRnzhx16NBB69evV/fu3fX+++/ryy+/1AcffKDk5GR16dJFU6ZM0dixYzVx4kRFREQ0KGYqewCAKdSP2XuzSFJaWppsNptrmTp1aoOuX1ZWJklKTEyUJG3atEm1tbXKyspy7dO+fXulp6eroKBAklRQUKCOHTsqOTnZtU92drbKy8u1bdu2Bn93KnsAgDn46Na74uJiWa1W1+oTVfW/5HQ6dd999+nSSy/V+eefL0kqKSlRRESEEhIS3PZNTk5WSUmJa5//TfT12+u3NRTJHgAAD1itVrdk3xC5ubn64osv9PHHHzdSVL+ONj4AwBQshuH1ciqGDRumpUuX6qOPPlLLli1d61NSUlRTU6NDhw657V9aWqqUlBTXPr+cnV//uX6fhiDZAwDMoYln4xuGoWHDhmnRokX68MMP1bp1a7ftXbt2VXh4uFauXOlaV1hYqKKiItntdkmS3W7X1q1bdeDAAdc+K1askNVqVWZmZoNjoY0PAEAjyM3N1YIFC/Svf/1L8fHxrjF2m82m6Oho2Ww2DR48WKNGjVJiYqKsVquGDx8uu92u7t27S5J69eqlzMxM3XrrrZo2bZpKSkr08MMPKzc3t0FzBeqR7AEAptDUT9CbOXOmJKlHjx5u6+fMmaPbb79dkjR9+nSFhIRo4MCBqq6uVnZ2tl544QXXvqGhoVq6dKmGDh0qu92u2NhY5eTkaPLkyR7FQrIHAJhDE78Ix2jAGH9UVJTy8vKUl5d30n0yMjK0bNkyzy7+C4zZAwAQ5KjsAQCmYOYX4ZDsAQDmYOL32ZPsAQCmYObKnjF7AACCHJU9AMAcaOMDABD8ArkV7w3a+AAABDkqewCAORjGscWb4wMUyR4AYArMxgcAAEGLyh4AYA7MxgcAILhZnMcWb44PVLTxAQAIclT2UOez9+vPV32u9mk/6AzbET3wci+t3drKbZ+M5B91T98N6tJ2v0JDDO0pbaaHXrlGpT/GSZLOal6u3P7r1alNiSLCHFq/PU3T3/qdfjwc44dvBJxc1atHVf3iEUX8MUrRI2IlSUenVahuY62cPzhlibEo9PwwRQ2NVWhGqOu4o89Uqu4/tXLudigkI1Tx+Ql++gY4ZSZu41PZQ9ERtdr5XXM99ealJ9x+VvNyzRzxtr49kKBhz/VVzv+7QfnvXaDq2mO/CKMiajX9nnckQ7r3+d/r7mf6KTzUoWlD3pMlkKevIujUba9TzdtVCjk71G19aLswRf/fOMXPT1DsU1bJkCpHlstwuP/8RvSJUvhVEU0ZMnyofja+N0ug8muyX7Nmjfr27avU1FRZLBYtXrzYn+GY1vrt6Xpp2cVa85/WJ9x+1+8/VcGXaXrh7e76+rsz9N1/rfr4i1Y6VBEtSerUulQpiRV6ZH4PfbM/Ud/sT9Qj83uqfdr36nrOd035VYCTMo4YOjrpsGLuj5Ul3uK2LaJflMK6hCukRahC24UpakiMjANOOUt+HqSNvi9WkQOjFJIa+stTI1DU32fvzRKg/JrsKysr1blzZ+Xl5fkzDPwKi8XQ7zKLVXwgQU/fvUxLH5mnv41cpMs77nHtEx7mkGFItXU//xKsqQ2V07CoU5sSP0QNHO/o05UK+12Ewi7+9crcOGqoZlm1LC1CFJJE8xPBwa9j9r1791bv3r0bvH91dbWqq6tdn8vLyxsjLPyPZnFHFRNVq1uytuilZRdp5pJL1K3DXj32l/c1/Pnfa8uuVG3bk6SqmjDdc/0GzVp6iSwWQ0P7fqqwUEPNrUf8/RUA1XxQLcdXdYp7yXbSfar/WaWqmZXSUSkkPUSxz1hlCbecdH8EHjM/VCegJuhNnTpVkyZN8ncYphLy00/32i8ytHBVJ0nS19+doY6tStT/0u3asitVhyqjNW7ONRp941rdcMUXchoWfbD5bO0oPkOGwS9L+Jez1KGqZysVO90qS+TJfx4jekUo7OJwGf91qvofR3Vk3GHFzbT96jEIMCaeoBdQyf7BBx/UqFGjXJ/Ly8uVlpbmx4iC36HKKNU5LNpT0sxt/Z7SZm4t+k8LW+rGKTfLFlslh9OiiqORenvKq1r537ObOmTAjaPQIeNHQxWDy/5npeT4vE41/6yS9cNEWUItssSFKDROUlqoQs8LU3nvg6pdU6OIayL9FjvgKwGV7CMjIxUZyX94TanOEartRUlKTzrktj4tqUwlP91297/KKqMkSRee852axR3Vx19kNEWYwEmFXRSuuHnu7fujj1UoJCNUkYOiZQk9QeVeXwHWBnAph+PQxoepRUfUquWZP1c9qc3Ldc5ZP6j8SJRKf4zTgg87aXLOSm3Z1UKbv05V9w7FuvS8bzX8+b6uY67rVqhvSxJ0qCJa57Uu1X0D1mnh6o4qOpDgh28E/MwSY1Fom1/8qouyyGINUWibMDm/c6jmwxqFXxwuS4JFzu+dqv77UVkiLQqz/zyZz7HXIR01ZBx0StWGHF/XSZJCWoUyth8oeOsdzKx9+vd6fvhS1+d7/7BekrRsw7l6dEEPrflPaz3x+mW69ZotGjlgnYoOJOihV67Rf75JcR2TnnRId//+U1ljqrX/YLzmvn+BFq7q2OTfBfBYpEWOz2tV8/pRGYcNWRJDFNY5TLGzbApp9vNs/KOPV8ixpc71ueKOY38gx7+RIEsLbsfD6c2vyb6iokI7d+50fd69e7e2bNmixMREpaen+zEyc/lsZ6ouHXHXr+7zzob2emdD+5Nun7Wkm2Yt6ebr0IBGEff8z239kDNCFPuk1aNjEJho4/vJxo0b1bNnT9fn+sl3OTk5ys/P91NUAICgxGx8/+jRo4eMAB4DAQAgEDBmDwAwBdr4AAAEO6dxbPHm+ABFsgcAmIOJx+x5ywMAAEGOyh4AYAoWeTlm77NImh7JHgBgDiZ+gh5tfAAAghyVPQDAFLj1DgCAYMdsfAAAEKyo7AEApmAxDFm8mGTnzbH+RrIHAJiD86fFm+MDFG18AACCHJU9AMAUaOMDABDsTDwbn2QPADAHnqAHAACCFZU9AMAUeIIeAADBjjY+AAAIVlT2AABTsDiPLd4cH6hI9gAAc6CNDwAAghWVPQDAHHioDgAAwc3Mj8uljQ8AQJCjsgcAmAMT9AAACHKGfn6n/aksHub6NWvWqG/fvkpNTZXFYtHixYvdwzEMjR8/Xi1atFB0dLSysrL09ddfu+1z8OBBDRo0SFarVQkJCRo8eLAqKio8/OIkewCASdSP2XuzeKKyslKdO3dWXl7eCbdPmzZNM2bM0KxZs7RhwwbFxsYqOztbVVVVrn0GDRqkbdu2acWKFVq6dKnWrFmju+66y+PvThsfAIBG0Lt3b/Xu3fuE2wzD0DPPPKOHH35Y/fr1kyTNmzdPycnJWrx4sW666SZt375dy5cv17///W9ddNFFkqTnnntO1113nZ588kmlpqY2OBYqewCAORj6edz+lJZjpykvL3dbqqurPQ5l9+7dKikpUVZWlmudzWZTt27dVFBQIEkqKChQQkKCK9FLUlZWlkJCQrRhwwaPrkeyBwCYg1eJ/ufJfWlpabLZbK5l6tSpHodSUlIiSUpOTnZbn5yc7NpWUlKipKQkt+1hYWFKTEx07dNQtPEBAPBAcXGxrFar63NkZKQfo2kYKnsAgDl4MxO/fpFktVrdllNJ9ikpKZKk0tJSt/WlpaWubSkpKTpw4IDb9rq6Oh08eNC1T0OR7AEAptDUs/F/TevWrZWSkqKVK1e61pWXl2vDhg2y2+2SJLvdrkOHDmnTpk2ufT788EM5nU5169bNo+vRxgcAoBFUVFRo586drs+7d+/Wli1blJiYqPT0dN1333165JFHdM4556h169YaN26cUlNT1b9/f0lShw4ddO2112rIkCGaNWuWamtrNWzYMN10000ezcSXSPYAALNo4ifobdy4UT179nR9HjVqlCQpJydH+fn5uv/++1VZWam77rpLhw4d0mWXXably5crKirKdcz8+fM1bNgwXX311QoJCdHAgQM1Y8YMj0Mn2QMAzKGJk32PHj1k/MoxFotFkydP1uTJk0+6T2JiohYsWODRdU+EMXsAAIIclT0AwBxM/CIckj0AwByckixeHh+gSPYAAFPw9vY5X95619QYswcAIMhR2QMAzIExewAAgpzTkCxeJGxn4CZ72vgAAAQ5KnsAgDnQxgcAINh5mewVuMmeNj4AAEGOyh4AYA608QEACHJOQ1614pmNDwAATldU9gAAczCcxxZvjg9QJHsAgDkwZg8AQJBjzB4AAAQrKnsAgDnQxgcAIMgZ8jLZ+yySJkcbHwCAIEdlDwAwB9r4AAAEOadTkhf3yjsD9z572vgAAAQ5KnsAgDnQxgcAIMiZONnTxgcAIMhR2QMAzMHEj8sl2QMATMEwnDK8eHOdN8f6G8keAGAOhuFddc6YPQAAOF1R2QMAzMHwcsw+gCt7kj0AwBycTsnixbh7AI/Z08YHACDIUdkDAMyBNj4AAMHNcDpleNHGD+Rb72jjAwAQ5KjsAQDmQBsfAIAg5zQkizmTPW18AACCHJU9AMAcDEOSN/fZB25lT7IHAJiC4TRkeNHGN0j2AACc5gynvKvsufUOAACcpqjsAQCmQBsfAIBgZ+I2fkAn+/q/shy1VX6OBGg8tZU1/g4BaDT1P99NUTXXqdarZ+rUqdZ3wTQxixHAfYm9e/cqLS3N32EAALxUXFysli1bNsq5q6qq1Lp1a5WUlHh9rpSUFO3evVtRUVE+iKzpBHSydzqd2rdvn+Lj42WxWPwdjimUl5crLS1NxcXFslqt/g4H8Cl+vpueYRg6fPiwUlNTFRLSeHPGq6qqVFPjfZcsIiIi4BK9FOBt/JCQkEb7SxC/zmq18ssQQYuf76Zls9ka/RpRUVEBmaR9hVvvAAAIciR7AACCHMkeHomMjNSECRMUGRnp71AAn+PnG8EqoCfoAQCA30ZlDwBAkCPZAwAQ5Ej2AAAEOZI9AABBjmSPBsvLy1OrVq0UFRWlbt266dNPP/V3SIBPrFmzRn379lVqaqosFosWL17s75AAnyLZo0EWLlyoUaNGacKECdq8ebM6d+6s7OxsHThwwN+hAV6rrKxU586dlZeX5+9QgEbBrXdokG7duuniiy/W888/L+nYewnS0tI0fPhwPfDAA36ODvAdi8WiRYsWqX///v4OBfAZKnv8ppqaGm3atElZWVmudSEhIcrKylJBQYEfIwMANATJHr/phx9+kMPhUHJystv65ORkn7wyEgDQuEj2AAAEOZI9ftMZZ5yh0NBQlZaWuq0vLS1VSkqKn6ICADQUyR6/KSIiQl27dtXKlStd65xOp1auXCm73e7HyAAADRHm7wAQGEaNGqWcnBxddNFFuuSSS/TMM8+osrJSd9xxh79DA7xWUVGhnTt3uj7v3r1bW7ZsUWJiotLT0/0YGeAb3HqHBnv++ef1xBNPqKSkRF26dNGMGTPUrVs3f4cFeG3VqlXq2bPncetzcnKUn5/f9AEBPkayBwAgyDFmDwBAkCPZAwAQ5Ej2AAAEOZI9AABBjmQPAECQI9kDABDkSPYAAAQ5kj0AAEGOZA946fbbb1f//v1dn3v06KH77ruvyeNYtWqVLBaLDh06dNJ9LBaLFi9e3OBzTpw4UV26dPEqrj179shisWjLli1enQfAqSPZIyjdfvvtslgsslgsioiIUNu2bTV58mTV1dU1+rX/+c9/asqUKQ3atyEJGgC8xYtwELSuvfZazZkzR9XV1Vq2bJlyc3MVHh6uBx988Lh9a2pqFBER4ZPrJiYm+uQ8AOArVPYIWpGRkUpJSVFGRoaGDh2qrKwsvf3225J+br0/+uijSk1NVbt27SRJxcXFuvHGG5WQkKDExET169dPe/bscZ3T4XBo1KhRSkhIUPPmzXX//ffrl6+X+GUbv7q6WmPHjlVaWpoiIyPVtm1bzZ49W3v27HG9fKVZs2ayWCy6/fbbJR17hfDUqVPVunVrRUdHq3PnznrzzTfdrrNs2TKde+65io6OVs+ePd3ibKixY8fq3HPPVUxMjNq0aaNx48aptrb2uP1efPFFpaWlKSYmRjfeeKPKysrctr/88svq0KGDoqKi1L59e73wwgsexwKg8ZDsYRrR0dGqqalxfV65cqUKCwu1YsUKLV26VLW1tcrOzlZ8fLzWrl2rTz75RHFxcbr22mtdxz311FPKz8/XK6+8oo8//lgHDx7UokWLfvW6t912m/7xj39oxowZ2r59u1588UXFxcUpLS1Nb731liSpsLBQ+/fv17PPPitJmjp1qubNm6dZs2Zp27ZtGjlypG655RatXr1a0rE/SgYMGKC+fftqy5YtuvPOO/XAAw94/L9JfHy88vPz9eWXX+rZZ5/VSy+9pOnTp7vts3PnTr3++utasmSJli9frs8++0z33HOPa/v8+fM1fvx4Pfroo9q+fbsee+wxjRs3TnPnzvU4HgCNxACCUE5OjtGvXz/DMAzD6XQaK1asMCIjI43Ro0e7ticnJxvV1dWuY1599VWjXbt2htPpdK2rrq42oqOjjffee88wDMNo0aKFMW3aNNf22tpao2XLlq5rGYZhXHnllcaIESMMwzCMwsJCQ5KxYsWKE8b50UcfGZKMH3/80bWuqqrKiImJMdatW+e27+DBg42bb77ZMAzDePDBB43MzEy37WPHjj3uXL8kyVi0aNFJtz/xxBNG165dXZ8nTJhghIaGGnv37nWte/fdd42QkBBj//79hmEYxtlnn20sWLDA7TxTpkwx7Ha7YRiGsXv3bkOS8dlnn530ugAaF2P2CFpLly5VXFycamtr5XQ69ec//1kTJ050be/YsaPbOP3nn3+unTt3Kj4+3u08VVVV2rVrl8rKyrR//35169bNtS0sLEwXXXTRca38elu2bFFoaKiuvPLKBse9c+dOHTlyRNdcc43b+pqaGl1wwQWSpO3bt7vFIUl2u73B16i3cOFCzZgxQ7t27VJFRYXq6upktVrd9klPT9dZZ53ldh2n06nCwkLFx8dr165dGjx4sIYMGeLap66uTjabzeN4ADQOkj2CVs+ePTVz5kxFREQoNTVVYWHuP+6xsbFunysqKtS1a1fNnz//uHOdeeaZpxRDdHS0x8dUVFRIkt555x23JCsdm4fgKwUFBRo0aJAmTZqk7Oxs2Ww2vfbaa3rqqac8jvWll1467o+P0NBQn8UKwDskewSt2NhYtW3btsH7X3jhhVq4cKGSkpKOq27rtWjRQhs2bNAVV1wh6VgFu2nTJl144YUn3L9jx45yOp1avXq1srKyjtte31lwOByudZmZmYqMjFRRUdFJOwIdOnRwTTast379+t/+kv9j3bp1ysjI0EMPPeRa9+233x63X1FRkfbt26fU1FTXdUJCQtSuXTslJycrNTVV33zzjQYNGuTR9QE0HSboAT8ZNGiQzjjjDPXr109r167V7t27tWrVKt17773au3evJGnEiBF6/PHHtXjxYu3YsUP33HPPr94j36pVK+Xk5Ogvf/mLFi9e7Drn66+/LknKyMiQxWLR0qVL9f3336uiokLx8fEaPXq0Ro4cqblz52rXrl3avHmznnvuOdekt7vvvltff/21xowZo8LCQi1YsED5+fkefd9zzjlHRUVFeu2117Rr1y7NmDHjhJMNo6KilJOTo88//1xr167VvffeqxtvvFEpKSmSpEmTJmnq1KmaMWOGvvrqK23dulVz5szR008/7VE8ABoPyR74SUxMjNasWaP09HQNGDBAHTp00ODBg1VVVeWq9P/617/q1ltvVU5Ojux2u+Lj4/WHP/zhV887c+ZM3XDDDbrnnnvUvn17DRkyRJWVlZKks846S5MmTdIDDzyg5ORkDRs2TJI0ZcoUjRs3TlOnTlWHDh107bXX6p133lHr1q0lHRtHf+utt7R48WJ17txZs2bN0mOPPebR973++us1cuRIDRs2TF26dNG6des0bty44/Zr27atBgwYoOuuu069evVSp06d3G6tu/POO/Xyyy9rzpw56tixo6688krl5+e7YgXgfxbjZDOLAABAUKCyBwAgyJHsAQAIciR7AACCHMkeAIAgR7IHACDIkewBAAhyJHsAAIIcyR4AgCBHsgcAIMiR7AEACHIkewAAgtz/BxyJJ0o/oRtCAAAAAElFTkSuQmCC", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm =confusion_matrix(y_test, firstknn_pred)\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n", + "disp.plot()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>4. Improving</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A grid search will be performed to find the optimal value of K. \n", + "\n", + "Afterwards, the stratified K fold cross validation will be used, followed by a confusion metric as an evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': 1}" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.model_selection import StratifiedKFold\n", + "#create new a knn model\n", + "knn2 = KNeighborsClassifier()\n", + "#create a dictionary of all values we want to test for n_neighbors\n", + "param_grid = {'n_neighbors': np.arange(1, 20,1)}\n", + "#use gridsearch to test all values for n_neighbors\n", + "knn_gscv = GridSearchCV(knn2, param_grid, cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False))\n", + "#fit model to data\n", + "knn_gscv.fit(X_train, y_train)\n", + "knn_gscv.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9391666666666667" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knn_gscv.best_score_" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://towardsdatascience.com/building-a-k-nearest-neighbors-k-nn-model-with-scikit-learn-51209555453a#:~:text=k%2DFold%20Cross%2DValidation,scored%20on%20the%20test%20set." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# from sklearn.model_selection import KFold\n", + "# from sklearn.model_selection import cross_val_score\n", + "# from sklearn.model_selection import StratifiedKFold\n", + "\n", + "# import seaborn as sns\n", + "# k_values = [i for i in range (1,30)]\n", + "# scores = []\n", + "\n", + "# for k in k_values:\n", + "# knn = KNeighborsClassifier(n_neighbors=k)\n", + "# score = cross_val_score(knn, X_train, y_train, cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False))\n", + "# scores.append(np.mean(score))\n", + "# sns.lineplot(x = k_values, y = scores, marker = 'o')\n", + "# plt.xlabel(\"K Values\")\n", + "# plt.ylabel(\"Accuracy Score\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 5. Metric Evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 0, ..., 0, 0, 1], dtype=int64)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_pred = knn_gscv.predict(X_test)\n", + "my_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9558333333333333" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "score = knn_gscv.score(X_test, y_test)\n", + "score" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.94914459 0.91638213 0.92562835 0.93236807 0.94583029]\n", + " StratifiedKFold Cross-Validation Accuracy: 93.39% | Standard Deviation: 1.23%\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import StratifiedKFold\n", + "\n", + "skf = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)\n", + "cvScore = cross_val_score(knn_gscv, X, y, cv=skf, scoring='f1_macro')\n", + "print (\" StratifiedKFold Cross-Validation Accuracy: %0.2f%% | Standard Deviation: %0.2f%%\" % (100*cvScore.mean(), 100*cvScore.std()))\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[580, 20],\n", + " [ 33, 567]], dtype=int64)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "confusion_matrix(y_test, my_pred)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.95 0.97 0.96 600\n", + " 1 0.97 0.94 0.96 600\n", + "\n", + " accuracy 0.96 1200\n", + " macro avg 0.96 0.96 0.96 1200\n", + "weighted avg 0.96 0.96 0.96 1200\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, my_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2EklEQVR4nO3deXgUZbr38V9nX7tDkCRGQgBBIMqiqNDjikYiooLgcRnUiOi8YoIKgshRdhUHFxQniqNIZEZGXEZGEFHEAVTiQhAPKkRBMIGQBMUkJJitu94/MO30gJqmO2m66/u5rrouu+qp6jvnZLhz389TVRbDMAwBAICgFeLvAAAAQOsi2QMAEORI9gAABDmSPQAAQY5kDwBAkCPZAwAQ5Ej2AAAEuTB/B+ANp9Op0tJSxcfHy2Kx+DscAICHDMPQgQMHlJqaqpCQ1qs/6+rq1NDQ4PV1IiIiFBUV5YOI2lZAJ/vS0lKlpaX5OwwAgJdKSkrUsWPHVrl2XV2duqTHqazC4fW1UlJStHPnzoBL+AGd7OPj4yVJ323qLGscMxIITlec1NvfIQCtpkmN+kArXf+et4aGhgaVVTj0XWFnWeOPPldUH3Aqvf8uNTQ0kOzbUnPr3hoX4tX/A4FjWZgl3N8hAK3n5we2t8VUbFy8RXHxR/89TgXudHFAJ3sAAFrKYTjl8OJtMA7D6btg2hjJHgBgCk4Zcuros7035/obvW8AAIIclT0AwBSccsqbRrx3Z/sXyR4AYAoOw5DDOPpWvDfn+httfAAAghyVPQDAFMy8QI9kDwAwBacMOUya7GnjAwAQ5KjsAQCmQBsfAIAgx2p8AAAQtKjsAQCm4Px58+b8QEWyBwCYgsPL1fjenOtvJHsAgCk4DHn51jvfxdLWmLMHACDIUdkDAEyBOXsAAIKcUxY5ZPHq/EBFGx8AgCBHZQ8AMAWncWjz5vxARbIHAJiCw8s2vjfn+httfAAAghyVPQDAFMxc2ZPsAQCm4DQschperMb34lx/o40PAECQo7IHAJgCbXwAAIKcQyFyeNHQdvgwlrZGsgcAmILh5Zy9wZw9AAA4VlHZAwBMgTl7AACCnMMIkcPwYs4+gB+XSxsfAIAgR2UPADAFpyxyelHjOhW4pT3JHgBgCmaes6eNDwBAkKOyBwCYgvcL9GjjAwBwTDs0Z+/Fi3Bo4wMAgGMVlT0AwBScXj4bn9X4AAAc45izBwAgyDkVYtr77JmzBwAgyFHZAwBMwWFY5PDiNbXenOtvJHsAgCk4vFyg56CNDwAAjlVU9gAAU3AaIXJ6sRrfGcCr8ansAQCm0NzG92bzxIwZM2SxWNy2nj17uo7X1dUpJydH7du3V1xcnEaOHKny8nK3axQXF2vo0KGKiYlRUlKSJk2apKamJo9/dip7AABaycknn6x3333X9Tks7Je0O378eL355pt65ZVXZLPZlJubqxEjRujDDz+UJDkcDg0dOlQpKSnasGGD9u7dqxtuuEHh4eF68MEHPYqDZA8AMAWnvFtR7zyKc8LCwpSSknLY/qqqKi1cuFBLlizRBRdcIElatGiRevXqpY8++kgDBw7UO++8o6+++krvvvuukpOT1a9fP82ePVuTJ0/WjBkzFBER0eI4aOMDAEyh+aE63mySVF1d7bbV19f/6nd+8803Sk1NVdeuXTVq1CgVFxdLkgoLC9XY2KjMzEzX2J49e6pTp04qKCiQJBUUFKh3795KTk52jcnKylJ1dbW+/PJLj352kj0AAB5IS0uTzWZzbXPmzDniuAEDBig/P1+rVq3S008/rZ07d+qcc87RgQMHVFZWpoiICCUkJLidk5ycrLKyMklSWVmZW6JvPt58zBO08QEApuD9s/EPnVtSUiKr1eraHxkZecTxQ4YMcf13nz59NGDAAKWnp+vll19WdHT0UcdxNKjsAQCm0Pw+e282SbJarW7bryX7/5aQkKCTTjpJ27dvV0pKihoaGlRZWek2pry83DXHn5KSctjq/ObPR1oH8FtI9gAAU2iu7L3ZvFFTU6MdO3bo+OOPV//+/RUeHq41a9a4jhcVFam4uFh2u12SZLfbtWXLFlVUVLjGrF69WlarVRkZGR59N218AABawcSJE3XZZZcpPT1dpaWlmj59ukJDQ3XttdfKZrNpzJgxmjBhghITE2W1WjVu3DjZ7XYNHDhQkjR48GBlZGTo+uuv19y5c1VWVqb77rtPOTk5Le4mNCPZAwBMwftn43t27u7du3Xttdfqhx9+UIcOHXT22Wfro48+UocOHSRJ8+bNU0hIiEaOHKn6+nplZWXpqaeecp0fGhqqFStWaOzYsbLb7YqNjVV2drZmzZrlcewkewCAKTgNi5ze3Gfv4bkvvfTSbx6PiopSXl6e8vLyfnVMenq6Vq5c6dH3Hglz9gAABDkqewCAKTi9bOM7A7g+JtkDAEzB+7feBW6yD9zIAQBAi1DZAwBMwSGLHDr6BXrenOtvJHsAgCnQxgcAAEGLyh4AYAoOedeKd/gulDZHsgcAmIKZ2/gkewCAKfjqFbeBKHAjBwAALUJlDwAwBeM/3kl/tOcHKpI9AMAUaOMDAICgRWUPADCFtn7F7bGEZA8AMAWHl2+98+ZcfwvcyAEAQItQ2QMATIE2PgAAQc6pEDm9aGh7c66/BW7kAACgRajsAQCm4DAscnjRivfmXH8j2QMATIE5ewAAgpzh5VvvDJ6gBwAAjlVU9gAAU3DIIocXL7Px5lx/I9kDAEzBaXg37+40fBhMG6ONDwBAkKOyN7m/PZKivz+W4rav44l1Wvj+NknS/oowPTc7VZvWx+tgTYjSTqzXNXeU65yhVa7x1T+G6qn7TtDHq22yhEhnX1KpsbP3KDrW2aY/C9BSV+eW66xLqpTWrV4NdSH6amOMFj5wvHbviHKNCY906k/TS3X+5ZUKjzRUuDZeT045QZXfh/sxcnjD6eUCPW/O9TeSPZTe4yc9tHSH63No6C+9qodv76Sa6lDNyN8pW2KT/v16Oz34/zrrybe+VrfeP0mS/pybrv3l4Zrz0g41NVr06IROenxSmqY89V2b/yxAS/Sx12p5/nH6enOMQsMM3XjPXj34j291y3k9VP9TqCTp1hmlOjOzWvf/v3TVVocq54E9mrZwlyYM6+7n6HG0nLLI6cW8uzfn+tsx8WdKXl6eOnfurKioKA0YMECffPKJv0MyldBQKTGpybXZ2jtcx77aGKthN32vnqce1PHpDfrjneWKtTn0zf9FS5KKv4nUxn9bNf7RYvU87aBOGVCr2+7frXX/StAPZfwtiWPTvaO6avXLifru6yh9+1W0Hr2zk5I7Nqp7n0N/wMbEO5R17X49MyNVn38Yr+1bYvTYhDSdfMZB9Tyt1s/RA57ze7JfunSpJkyYoOnTp2vTpk3q27evsrKyVFFR4e/QTGPPzghde+rJyh7YSw/ldFLF7l/alBmn12rdGwmq/jFUTqe0dlmCGuos6vOHGknS1o2xirM16aS+P7nOOe2cA7KESNs+i23znwU4GrHWQ3/gHqg8VNV373NQ4RGGPns/3jWmZHuUyneHq1f/g36JEd5rfoKeN1ug8nuyf+yxx3TLLbdo9OjRysjI0IIFCxQTE6Pnn3/e36GZQs/TajXx8WI98OIOjXtot8qKI3XXFd11sObQr8a9z3wnR6NF/3Nyb13aua+emJym6Qt36YQuDZKk/fvClNC+ye2aoWFSfEKT9ldQ2ePYZ7EYunXmHn3xSYy+KzrUsUpMalJDvUW11aFuYyv3hSkxqdEfYcIHmufsvdkClV//NW5oaFBhYaGmTJni2hcSEqLMzEwVFBQcNr6+vl719fWuz9XV1W0SZzA744IDrv/umlGnnqce1PVnZmj9Gwm6+I/79cLcFNVUh+qhpdtlTWxSwSqbHri1sx59/Rt16VXnx8gB38h9cI/Se9bpruHd/B0K0Gr8+mfK999/L4fDoeTkZLf9ycnJKisrO2z8nDlzZLPZXFtaWlpbhWoacTaHOnatV+muSJXuitAbizpowmMlOvWcGp14cp2uu6tc3fsc1Bv5x0mSEjs0qfIH978ZHU3SgcowJSY1HekrgGNGzgO7NeCiat195Yn6fm+Ea//+ijBFRBqu9n6zhA5N2l/BavxA5ZTF9Xz8o9pYoNc2pkyZoqqqKtdWUlLi75CCzk+1ISr9LkKJSY2q/+nQr0dIiPuTJEJDDRk/31XX6/Ra1VSFuRbsSdLmD+JlOKWep7KQCccqQzkP7NYfLq7S3f9zospLIt2OfvN/MWpssOjUs3/pfHU8sU7JHRu1tTCmrYOFjxg/r8Y/2s0I4GTv1zb+cccdp9DQUJWXl7vtLy8vV0pKymHjIyMjFRkZedh+HL2/zkzVwMFVSurYqB/KwvS3R45XaIh0/hU/Ks7qUGqXej1xd5pumVYqa7smbVhl06b18Zq1+FtJUqfu9Tp9ULUen5imcX/eLUejRXn3naDzhlWqfQqVPY5NuQ/u0aArftSM0V30U02I2nU4NA9feyBUDXUhOnggVG//I1F/mlGqA5Vhqj0QopwH9uirjTHatomFp4GKt975SUREhPr37681a9Zo+PDhkiSn06k1a9YoNzfXn6GZxvd7wzXnts468GOobO2bdPIZtXp8xddK+Pn2u/v/tkMLH0zV9Owu+qk2RKldGjTxiWKdeeEvFc/kv3ynvHs76p6rTnQ9VOe2+/f460cCftdlN/4gSXrknzvc9j9yZ5pWv5woSVowI1VOQ5r67C6FRxrauDZef5lyQpvHCviCxTAMvz7td+nSpcrOztYzzzyjM888U48//rhefvllbdu27bC5/P9WXV0tm82mH7/uKmt8QM1IAC2WldrP3yEArabJaNRa/UtVVVWyWq2t8h3NueKK1aMVHhvx+yf8isbaBr1+0aJWjbW1+P3eqKuvvlr79u3TtGnTVFZWpn79+mnVqlW/m+gBAPAEbXw/y83NpW0PAEArOSaSPQAArc3Mz8Yn2QMATMHMbXxWtQEAEOSo7AEApmDmyp5kDwAwBTMne9r4AAAEOSp7AIApmLmyJ9kDAEzBkHe3z/n1cbNeItkDAEzBzJU9c/YAAAQ5KnsAgCmYubIn2QMATMHMyZ42PgAAQY7KHgBgCmau7En2AABTMAyLDC8Stjfn+httfAAAWtlDDz0ki8WiO++807Wvrq5OOTk5at++veLi4jRy5EiVl5e7nVdcXKyhQ4cqJiZGSUlJmjRpkpqamjz+fpI9AMAUmt9n7812ND799FM988wz6tOnj9v+8ePHa/ny5XrllVe0bt06lZaWasSIEa7jDodDQ4cOVUNDgzZs2KAXXnhB+fn5mjZtmscxkOwBAKbQPGfvzeapmpoajRo1Ss8++6zatWvn2l9VVaWFCxfqscce0wUXXKD+/ftr0aJF2rBhgz766CNJ0jvvvKOvvvpKf//739WvXz8NGTJEs2fPVl5enhoaGjyKg2QPAIAHqqur3bb6+vpfHZuTk6OhQ4cqMzPTbX9hYaEaGxvd9vfs2VOdOnVSQUGBJKmgoEC9e/dWcnKya0xWVpaqq6v15ZdfehQzyR4AYArNC/S82SQpLS1NNpvNtc2ZM+eI3/fSSy9p06ZNRzxeVlamiIgIJSQkuO1PTk5WWVmZa8x/Jvrm483HPMFqfACAKfjq1ruSkhJZrVbX/sjIyMPGlpSU6I477tDq1asVFRV11N/pK1T2AABT8FVlb7Va3bYjJfvCwkJVVFTotNNOU1hYmMLCwrRu3TrNnz9fYWFhSk5OVkNDgyorK93OKy8vV0pKiiQpJSXlsNX5zZ+bx7QUyR4AAB+78MILtWXLFm3evNm1nX766Ro1apTrv8PDw7VmzRrXOUVFRSouLpbdbpck2e12bdmyRRUVFa4xq1evltVqVUZGhkfx0MYHAJiC4WUb35OH6sTHx+uUU05x2xcbG6v27du79o8ZM0YTJkxQYmKirFarxo0bJ7vdroEDB0qSBg8erIyMDF1//fWaO3euysrKdN999yknJ+eI3YTfQrIHAJiCIckwvDvfl+bNm6eQkBCNHDlS9fX1ysrK0lNPPeU6HhoaqhUrVmjs2LGy2+2KjY1Vdna2Zs2a5fF3kewBAGgDa9eudfscFRWlvLw85eXl/eo56enpWrlypdffTbIHAJiCUxZZjvIpeM3nByqSPQDAFHgRDgAACFpU9gAAU3AaFll4nz0AAMHLMLxcje/r5fhtiDY+AABBjsoeAGAKZl6gR7IHAJgCyR4AgCBn5gV6zNkDABDkqOwBAKZg5tX4JHsAgCkcSvbezNn7MJg2RhsfAIAgR2UPADAFVuMDABDkDHn3TvoA7uLTxgcAINhR2QMATIE2PgAAwc7EfXySPQDAHLys7BXAlT1z9gAABDkqewCAKfAEPQAAgpyZF+jRxgcAIMhR2QMAzMGweLfILoAre5I9AMAUzDxnTxsfAIAgR2UPADAHHqoDAEBwM/Nq/BYl+zfeeKPFF7z88suPOhgAAOB7LUr2w4cPb9HFLBaLHA6HN/EAANB6ArgV740WJXun09nacQAA0KrM3Mb3ajV+XV2dr+IAAKB1GT7YApTHyd7hcGj27Nk64YQTFBcXp2+//VaSNHXqVC1cuNDnAQIAAO94nOwfeOAB5efna+7cuYqIiHDtP+WUU/Tcc8/5NDgAAHzH4oMtMHmc7BcvXqy//vWvGjVqlEJDQ137+/btq23btvk0OAAAfIY2fsvt2bNH3bp1O2y/0+lUY2OjT4ICAAC+43Gyz8jI0Pvvv3/Y/ldffVWnnnqqT4ICAMDnTFzZe/wEvWnTpik7O1t79uyR0+nUP//5TxUVFWnx4sVasWJFa8QIAID3TPzWO48r+2HDhmn58uV69913FRsbq2nTpmnr1q1avny5LrrootaIEQAAeOGono1/zjnnaPXq1b6OBQCAVmPmV9we9YtwNm7cqK1bt0o6NI/fv39/nwUFAIDP8da7ltu9e7euvfZaffjhh0pISJAkVVZW6g9/+INeeukldezY0dcxAgAAL3g8Z3/zzTersbFRW7du1f79+7V//35t3bpVTqdTN998c2vECACA95oX6HmzBSiPK/t169Zpw4YN6tGjh2tfjx499OSTT+qcc87xaXAAAPiKxTi0eXN+oPI42aelpR3x4TkOh0Opqak+CQoAAJ8z8Zy9x238hx9+WOPGjdPGjRtd+zZu3Kg77rhDjzzyiE+DAwAA3mtRZd+uXTtZLL/MVdTW1mrAgAEKCzt0elNTk8LCwnTTTTdp+PDhrRIoAABeMfFDdVqU7B9//PFWDgMAgFZm4jZ+i5J9dnZ2a8cBAABayVE/VEeS6urq1NDQ4LbParV6FRAAAK3CxJW9xwv0amtrlZubq6SkJMXGxqpdu3ZuGwAAxyQTv/XO42R/991367333tPTTz+tyMhIPffcc5o5c6ZSU1O1ePHi1ogRAAB4weM2/vLly7V48WKdf/75Gj16tM455xx169ZN6enpevHFFzVq1KjWiBMAAO+YeDW+x5X9/v371bVrV0mH5uf3798vSTr77LO1fv1630YHAICPND9Bz5stUHmc7Lt27aqdO3dKknr27KmXX35Z0qGKv/nFOAAA4NjhcbIfPXq0Pv/8c0nSPffco7y8PEVFRWn8+PGaNGmSzwMEAMAn2niB3tNPP60+ffrIarXKarXKbrfrrbfech2vq6tTTk6O2rdvr7i4OI0cOVLl5eVu1yguLtbQoUMVExOjpKQkTZo0SU1NTR7/6B7P2Y8fP97135mZmdq2bZsKCwvVrVs39enTx+MAAAAIRh07dtRDDz2k7t27yzAMvfDCCxo2bJg+++wznXzyyRo/frzefPNNvfLKK7LZbMrNzdWIESP04YcfSjr0zpmhQ4cqJSVFGzZs0N69e3XDDTcoPDxcDz74oEexWAzDCNhZiOrqatlsNv34dVdZ4z1uUgABISu1n79DAFpNk9GotfqXqqqqWu05Lc25Iv3P9yskKuqor+Osq9N3k+/zKtbExEQ9/PDDuvLKK9WhQwctWbJEV155pSRp27Zt6tWrlwoKCjRw4EC99dZbuvTSS1VaWqrk5GRJ0oIFCzR58mTt27dPERERLf7eFlX28+fPb/EFb7/99haPBQAg0FRXV7t9joyMVGRk5G+e43A49Morr6i2tlZ2u12FhYVqbGxUZmama0zPnj3VqVMnV7IvKChQ7969XYlekrKysjR27Fh9+eWXOvXUU1scc4uS/bx581p0MYvF4pdkP6L36QqzhLf59wJt4bXd6/wdAtBqqg84ldazjb7MR7fepaWlue2ePn26ZsyYccRTtmzZIrvdrrq6OsXFxen1119XRkaGNm/erIiIiMMWticnJ6usrEySVFZW5pbom483H/NEi5J98+p7AAAClo8el1tSUuLWxv+tqr5Hjx7avHmzqqqq9Oqrryo7O1vr1rX9H/BePRsfAACzaV5d3xIRERHq1q2bJKl///769NNP9cQTT+jqq69WQ0ODKisr3ar78vJypaSkSJJSUlL0ySefuF2vebV+85iWYlUbAMAcjoFn4zudTtXX16t///4KDw/XmjVrXMeKiopUXFwsu90uSbLb7dqyZYsqKipcY1avXi2r1aqMjAyPvpfKHgBgCt4+Bc/Tc6dMmaIhQ4aoU6dOOnDggJYsWaK1a9fq7bffls1m05gxYzRhwgQlJibKarVq3LhxstvtGjhwoCRp8ODBysjI0PXXX6+5c+eqrKxM9913n3Jycn53QeB/I9kDANAKKioqdMMNN2jv3r2y2Wzq06eP3n77bV100UWSDi1+DwkJ0ciRI1VfX6+srCw99dRTrvNDQ0O1YsUKjR07Vna7XbGxscrOztasWbM8joVkDwAwhzZ+n/3ChQt/83hUVJTy8vKUl5f3q2PS09O1cuVKz774CI5qzv7999/XddddJ7vdrj179kiS/va3v+mDDz7wOiAAAFrFMTBn7y8eJ/vXXntNWVlZio6O1meffab6+npJUlVVlceP7wMAAK3P42R///33a8GCBXr22WcVHv7Lg2zOOussbdq0yafBAQDgK2Z+xa3Hc/ZFRUU699xzD9tvs9lUWVnpi5gAAPA9Hz1BLxB5XNmnpKRo+/bth+3/4IMP1LVrV58EBQCAzzFn33K33HKL7rjjDn388ceyWCwqLS3Viy++qIkTJ2rs2LGtESMAAPCCx238e+65R06nUxdeeKEOHjyoc889V5GRkZo4caLGjRvXGjECAOC1tn6ozrHE42RvsVh07733atKkSdq+fbtqamqUkZGhuLi41ogPAADfaOP77I8lR/1QnYiICI+fzQsAANqex8l+0KBBslh+fUXie++951VAAAC0Cm9vnzNTZd+vXz+3z42Njdq8ebO++OILZWdn+youAAB8izZ+y82bN++I+2fMmKGamhqvAwIAAL7ls/fZX3fddXr++ed9dTkAAHzLxPfZ++ytdwUFBYqKivLV5QAA8CluvfPAiBEj3D4bhqG9e/dq48aNmjp1qs8CAwAAvuFxsrfZbG6fQ0JC1KNHD82aNUuDBw/2WWAAAMA3PEr2DodDo0ePVu/evdWuXbvWigkAAN8z8Wp8jxbohYaGavDgwbzdDgAQcMz8iluPV+Ofcsop+vbbb1sjFgAA0Ao8Tvb333+/Jk6cqBUrVmjv3r2qrq522wAAOGaZ8LY7yYM5+1mzZumuu+7SJZdcIkm6/PLL3R6baxiGLBaLHA6H76MEAMBbJp6zb3Gynzlzpm699Vb9+9//bs14AACAj7U42RvGoT9pzjvvvFYLBgCA1sJDdVrot952BwDAMY02fsucdNJJv5vw9+/f71VAAADAtzxK9jNnzjzsCXoAAAQC2vgtdM011ygpKam1YgEAoPWYuI3f4vvsma8HACAwebwaHwCAgGTiyr7Fyd7pdLZmHAAAtCrm7AEACHYmruw9fjY+AAAILFT2AABzMHFlT7IHAJiCmefsaeMDABDkqOwBAOZAGx8AgOBGGx8AAAQtKnsAgDnQxgcAIMiZONnTxgcAIMhR2QMATMHy8+bN+YGKZA8AMAcTt/FJ9gAAU+DWOwAAELSo7AEA5kAbHwAAEwjghO0N2vgAAAQ5KnsAgCmYeYEeyR4AYA4mnrOnjQ8AQJCjsgcAmAJtfAAAgh1tfAAAEKyo7AEApmDmNj6VPQDAHAwfbB6YM2eOzjjjDMXHxyspKUnDhw9XUVGR25i6ujrl5OSoffv2iouL08iRI1VeXu42pri4WEOHDlVMTIySkpI0adIkNTU1eRQLyR4AYA5tnOzXrVunnJwcffTRR1q9erUaGxs1ePBg1dbWusaMHz9ey5cv1yuvvKJ169aptLRUI0aMcB13OBwaOnSoGhoatGHDBr3wwgvKz8/XtGnTPIqFNj4AAK1g1apVbp/z8/OVlJSkwsJCnXvuuaqqqtLChQu1ZMkSXXDBBZKkRYsWqVevXvroo480cOBAvfPOO/rqq6/07rvvKjk5Wf369dPs2bM1efJkzZgxQxERES2KhcoeAGAKzXP23mySVF1d7bbV19e36PurqqokSYmJiZKkwsJCNTY2KjMz0zWmZ8+e6tSpkwoKCiRJBQUF6t27t5KTk11jsrKyVF1drS+//LLFPzvJHgBgDj5q46elpclms7m2OXPm/O5XO51O3XnnnTrrrLN0yimnSJLKysoUERGhhIQEt7HJyckqKytzjfnPRN98vPlYS9HGBwDAAyUlJbJara7PkZGRv3tOTk6OvvjiC33wwQetGdqvItkDAEzBYhiyGEd//1zzuVar1S3Z/57c3FytWLFC69evV8eOHV37U1JS1NDQoMrKSrfqvry8XCkpKa4xn3zyidv1mlfrN49pCdr4AABzaOPV+IZhKDc3V6+//rree+89denSxe14//79FR4erjVr1rj2FRUVqbi4WHa7XZJkt9u1ZcsWVVRUuMasXr1aVqtVGRkZLY6Fyh4AgFaQk5OjJUuW6F//+pfi4+Ndc+w2m03R0dGy2WwaM2aMJkyYoMTERFmtVo0bN052u10DBw6UJA0ePFgZGRm6/vrrNXfuXJWVlem+++5TTk5Oi6YPmpHsAQCm0NZP0Hv66aclSeeff77b/kWLFunGG2+UJM2bN08hISEaOXKk6uvrlZWVpaeeeso1NjQ0VCtWrNDYsWNlt9sVGxur7OxszZo1y6NYSPYAAHNo4xfhGC1YHxAVFaW8vDzl5eX96pj09HStXLnSsy//L8zZAwAQ5KjsAQCmYOYX4ZDsAQDmYOL32ZPsAQCmYObKnjl7AACCHJU9AMAcaOMDABD8ArkV7w3a+AAABDkqewCAORjGoc2b8wMUyR4AYAqsxgcAAEGLyh4AYA6sxgcAILhZnIc2b84PVLTxAQAIclT2OMzQUeW69LoKJZ1QL0kq/iZaL84/QRvXJUiSbn9gp/qdVa32yQ36qTZUWzfFaeFDadr9bbQfowaObOmjHfXyvI5u+1JP/ElPrvvc9bmoME5L/pymbz6LU0iooc4nH9TUv29VZLShLzZYNf2qjCNe+88rtqhbv9pWjR8+RBsf+MX3ZRF6/s9p2rMrShaLocyR32v6X79R7qUn67tvYvTNF7F671/ttW9PpOITmnTdnXv04OIi3XhuXzmdFn+HDxwmrcdBTf/HVtfn0LBf/tUuKozT/df11BU5pRoze5dCwwzt+ipGIT/3PXucfkDPbSp0u95LD3fU/31o04l9SfSBhNX4frJ+/XpddtllSk1NlcVi0bJly/wZDn728Zp2+nRtgkp3RWnPzmi98Eia6g6GqOeph/5he+sfSfriE6vK90Rq+5exeuHRjko6oUHJHev9HDlwZKGhhtolNbo2a2KT69iiGem65KYyjcgtVaceP+mEE+t01mX7FR556F/28Aj3c+PbNemTdxJ1wVX7ZOFv28DSfJ+9N1uA8muyr62tVd++fZWXl+fPMPAbQkIMnXfpD4qMdmrrprjDjkdGO3TRlfu0tzhS+/ZG+CFC4Pft3Rmlm/ufprF/6KfHc7tp355Dv6tV34fpm8/iZWvfqP8ddrJu6neapo7M0NZP4n/1Wp++0041P4bpgqv2tVX4gNf82sYfMmSIhgwZ0uLx9fX1qq//pXqsrq5ujbAgqXOPg5r32leKiHTqp4Ohmn1rdxVv/2VO/tLryjXmnhJFxzpVsiNK/3t9DzU1st4Tx57up9Yod94OpXat048V4XplXkfdN+JkPb7mc5V/FyVJWvpYR2VPLVbnk2u17tUOmnFNL8179/+U2rXusOuteamD+p5XqfapDW39o8BLtPEDxJw5c2Sz2VxbWlqav0MKWru/jdJtQ0/RHVecrDf/nqS7HvlWnbr95Dr+3r/aK+fSUzTx6p7aszNK//uX7QqPCOD7UhC0TrugUn+4dL86ZxzUqedX6d7F23SwOlQfLm8v58//eA++rkIXXL1PXU85qNEzvlNq15/03tIOh13rh9IIfb4uQRdeQ1UfkAwfbAEqoJL9lClTVFVV5dpKSkr8HVLQamoM0d7vorT9i1gtejhNO7fGaPjoMtfxgwfCVLorSl98YtX9t3VT2ol1OivrRz9GDLRMrM2h47vWqWxXlNolNUqSOnb/yW1Mx+51+n5P5GHnvvdyB8W1a9IZg/ldR2AJqNX4kZGRiow8/H+AaH2WEEPhEUf+s9ZikWQRlT0Cwk+1ISrfFaV2I75XUlq9EpMbVPptlNuYvd9G6dRBlW77DONQsj//yn0KCw/gEs/EzNzGD6hkj7YxelKJPl1n0749kYqOc2jQ5T+oz8ADujc7VSlpdTrv0v0qfN+mqv1hOi6lQVeP3auGOos+WZvg79CBw7wwu5NOz/xRHTo2aH95uJY+2lEhoYbOHv69LBZp2NhSLX20ozr3OqjOJ9dq7asdtGd7tCY+87XbdbZ8aFVFcZQuvLbCTz8JvMZb74BfJLRv1KRHv1W7Do06eCBUO7fF6N7sHvrsA5sSkxp08hkHNPymMsVZHar8PlxbPonXhCszVPVDuL9DBw7zw94IzcvtrgM/hsma2KheZx7QnDe+kK39odvvLr25TA11IVo0M101lWHqnHFQ0/6xVSmd3W8lXfOPJPU4/YA6djt80R5wrPNrsq+pqdH27dtdn3fu3KnNmzcrMTFRnTp18mNk5jbvnq6/emx/RYSm3dSjDaMBvDPhqe2/O2ZEbqlG5Jb+5pjxeb9/HRzbaOP7ycaNGzVo0CDX5wkTJkiSsrOzlZ+f76eoAABBicfl+sf5558vI4DnQAAACATM2QMATIE2PgAAwc5pyPUkpaM9P0CR7AEA5mDiOfuAeoIeAADwHJU9AMAULPJyzt5nkbQ9kj0AwBxM/AQ92vgAAAQ5KnsAgClw6x0AAMGO1fgAACBYUdkDAEzBYhiyeLHIzptz/Y1kDwAwB+fPmzfnByja+AAABDkqewCAKdDGBwAg2Jl4NT7JHgBgDjxBDwAABCsqewCAKfAEPQAAgh1tfAAAEKyo7AEApmBxHtq8OT9QkewBAOZAGx8AAAQrKnsAgDnwUB0AAIKbmR+XSxsfAIAgR2UPADAHEy/QI9kDAMzBkHfvpA/cXE8bHwBgDs1z9t5snli/fr0uu+wypaamymKxaNmyZW7HDcPQtGnTdPzxxys6OlqZmZn65ptv3Mbs379fo0aNktVqVUJCgsaMGaOamhqPf3aSPQAAraC2tlZ9+/ZVXl7eEY/PnTtX8+fP14IFC/Txxx8rNjZWWVlZqqurc40ZNWqUvvzyS61evVorVqzQ+vXr9ac//cnjWGjjAwDMwZCXc/aeDR8yZIiGDBly5EsZhh5//HHdd999GjZsmCRp8eLFSk5O1rJly3TNNddo69atWrVqlT799FOdfvrpkqQnn3xSl1xyiR555BGlpqa2OBYqewCAOTQv0PNmk1RdXe221dfXexzKzp07VVZWpszMTNc+m82mAQMGqKCgQJJUUFCghIQEV6KXpMzMTIWEhOjjjz/26PtI9gAAeCAtLU02m821zZkzx+NrlJWVSZKSk5Pd9icnJ7uOlZWVKSkpye14WFiYEhMTXWNaijY+AMAcnJIsXp4vqaSkRFar1bU7MjLSq7DaApU9AMAUfLUa32q1um1Hk+xTUlIkSeXl5W77y8vLXcdSUlJUUVHhdrypqUn79+93jWkpkj0AAG2sS5cuSklJ0Zo1a1z7qqur9fHHH8tut0uS7Ha7KisrVVhY6Brz3nvvyel0asCAAR59H218AIA5tPET9GpqarR9+3bX5507d2rz5s1KTExUp06ddOedd+r+++9X9+7d1aVLF02dOlWpqakaPny4JKlXr166+OKLdcstt2jBggVqbGxUbm6urrnmGo9W4kskewCAWbRxst+4caMGDRrk+jxhwgRJUnZ2tvLz83X33XertrZWf/rTn1RZWamzzz5bq1atUlRUlOucF198Ubm5ubrwwgsVEhKikSNHav78+R6HTrIHAKAVnH/++TJ+4w8Ei8WiWbNmadasWb86JjExUUuWLPE6FpI9AMAceBEOAABBzke33gUikj0AwBSO5mU2/31+oOLWOwAAghyVPQDAHJizBwAgyDkNyeJFwnYGbrKnjQ8AQJCjsgcAmANtfAAAgp2XyV6Bm+xp4wMAEOSo7AEA5kAbHwCAIOc05FUrntX4AADgWEVlDwAwB8N5aPPm/ABFsgcAmANz9gAABDnm7AEAQLCisgcAmANtfAAAgpwhL5O9zyJpc7TxAQAIclT2AABzoI0PAECQczoleXGvvDNw77OnjQ8AQJCjsgcAmANtfAAAgpyJkz1tfAAAghyVPQDAHEz8uFySPQDAFAzDKcOLN9d5c66/kewBAOZgGN5V58zZAwCAYxWVPQDAHAwv5+wDuLIn2QMAzMHplCxezLsH8Jw9bXwAAIIclT0AwBxo4wMAENwMp1OGF238QL71jjY+AABBjsoeAGAOtPEBAAhyTkOymDPZ08YHACDIUdkDAMzBMCR5c5994Fb2JHsAgCkYTkOGF218g2QPAMAxznDKu8qeW+8AAMAxisoeAGAKtPEBAAh2Jm7jB3Syb/4rq8lo9HMkQOupPhC4/8AAv+dAzaHf77aompvU6NUzdZoUuLkmoJP9gQMHJEnvN7zu50iA1pPW098RAK3vwIEDstlsrXLtiIgIpaSk6IOylV5fKyUlRRERET6Iqm1ZjACehHA6nSotLVV8fLwsFou/wzGF6upqpaWlqaSkRFar1d/hAD7F73fbMwxDBw4cUGpqqkJCWm/NeF1dnRoaGry+TkREhKKionwQUdsK6Mo+JCREHTt29HcYpmS1WvnHEEGL3++21VoV/X+KiooKyCTtK9x6BwBAkCPZAwAQ5Ej28EhkZKSmT5+uyMhIf4cC+By/3whWAb1ADwAA/D4qewAAghzJHgCAIEeyBwAgyJHsAQAIciR7tFheXp46d+6sqKgoDRgwQJ988om/QwJ8Yv369brsssuUmpoqi8WiZcuW+TskwKdI9miRpUuXasKECZo+fbo2bdqkvn37KisrSxUVFf4ODfBabW2t+vbtq7y8PH+HArQKbr1DiwwYMEBnnHGG/vKXv0g69F6CtLQ0jRs3Tvfcc4+fowN8x2Kx6PXXX9fw4cP9HQrgM1T2+F0NDQ0qLCxUZmama19ISIgyMzNVUFDgx8gAAC1Bssfv+v777+VwOJScnOy2Pzk5WWVlZX6KCgDQUiR7AACCHMkev+u4445TaGioysvL3faXl5crJSXFT1EBAFqKZI/fFRERof79+2vNmjWufU6nU2vWrJHdbvdjZACAlgjzdwAIDBMmTFB2drZOP/10nXnmmXr88cdVW1ur0aNH+zs0wGs1NTXavn276/POnTu1efNmJSYmqlOnTn6MDPANbr1Di/3lL3/Rww8/rLKyMvXr10/z58/XgAED/B0W4LW1a9dq0KBBh+3Pzs5Wfn5+2wcE+BjJHgCAIMecPQAAQY5kDwBAkCPZAwAQ5Ej2AAAEOZI9AABBjmQPAECQI9kDABDkSPYAAAQ5kj3gpRtvvFHDhw93fT7//PN15513tnkca9eulcViUWVl5a+OsVgsWrZsWYuvOWPGDPXr18+ruHbt2iWLxaLNmzd7dR0AR49kj6B04403ymKxyGKxKCIiQt26ddOsWbPU1NTU6t/9z3/+U7Nnz27R2JYkaADwFi/CQdC6+OKLtWjRItXX12vlypXKyclReHi4pkyZctjYhoYGRURE+OR7ExMTfXIdAPAVKnsErcjISKWkpCg9PV1jx45VZmam3njjDUm/tN4feOABpaamqkePHpKkkpISXXXVVUpISFBiYqKGDRumXbt2ua7pcDg0YcIEJSQkqH379rr77rv136+X+O82fn19vSZPnqy0tDRFRkaqW7duWrhwoXbt2uV6+Uq7du1ksVh04403Sjr0CuE5c+aoS5cuio6OVt++ffXqq6+6fc/KlSt10kknKTo6WoMGDXKLs6UmT56sk046STExMerataumTp2qxsbGw8Y988wzSktLU0xMjK666ipVVVW5HX/uuefUq1cvRUVFqWfPnnrqqac8jgVA6yHZwzSio6PV0NDg+rxmzRoVFRVp9erVWrFihRobG5WVlaX4+Hi9//77+vDDDxUXF6eLL77Ydd6jjz6q/Px8Pf/88/rggw+0f/9+vf7667/5vTfccIP+8Y9/aP78+dq6daueeeYZxcXFKS0tTa+99pokqaioSHv37tUTTzwhSZozZ44WL16sBQsW6Msvv9T48eN13XXXad26dZIO/VEyYsQIXXbZZdq8ebNuvvlm3XPPPR7/3yQ+Pl75+fn66quv9MQTT+jZZ5/VvHnz3MZs375dL7/8spYvX65Vq1bps88+02233eY6/uKLL2ratGl64IEHtHXrVj344IOaOnWqXnjhBY/jAdBKDCAIZWdnG8OGDTMMwzCcTqexevVqIzIy0pg4caLreHJyslFfX+86529/+5vRo0cPw+l0uvbV19cb0dHRxttvv20YhmEcf/zxxty5c13HGxsbjY4dO7q+yzAM47zzzjPuuOMOwzAMo6ioyJBkrF69+ohx/vvf/zYkGT/++KNrX11dnRETE2Ns2LDBbeyYMWOMa6+91jAMw5gyZYqRkZHhdnzy5MmHXeu/STJef/31Xz3+8MMPG/3793d9nj59uhEaGmrs3r3bte+tt94yQkJCjL179xqGYRgnnniisWTJErfrzJ4927Db7YZhGMbOnTsNScZnn332q98LoHUxZ4+gtWLFCsXFxamxsVFOp1N//OMfNWPGDNfx3r17u83Tf/7559q+fbvi4+PdrlNXV6cdO3aoqqpKe/fu1YABA1zHwsLCdPrppx/Wym+2efNmhYaG6rzzzmtx3Nu3b9fBgwd10UUXue1vaGjQqaeeKknaunWrWxySZLfbW/wdzZYuXar58+drx44dqqmpUVNTk6xWq9uYTp066YQTTnD7HqfTqaKiIsXHx2vHjh0aM2aMbrnlFteYpqYm2Ww2j+MB0DpI9ghagwYN0tNPP62IiAilpqYqLMz91z02Ntbtc01Njfr3768XX3zxsGt16NDhqGKIjo72+JyamhpJ0ptvvumWZKVD6xB8paCgQKNGjdLMmTOVlZUlm82ml156SY8++qjHsT777LOH/fERGhrqs1gBeIdkj6AVGxurbt26tXj8aaedpqVLlyopKemw6rbZ8ccfr48//ljnnnuupEMVbGFhoU477bQjju/du7ecTqfWrVunzMzMw443dxYcDodrX0ZGhiIjI1VcXPyrHYFevXq5Fhs2++ijj37/h/wPGzZsUHp6uu69917Xvu++++6wccXFxSotLVVqaqrre0JCQtSjRw8lJycrNTVV3377rUaNGuXR9wNoOyzQA342atQoHXfccRo2bJjef/997dy5U2vXrtXtt9+u3bt3S5LuuOMOPfTQQ1q2bJm2bdum22677Tfvke/cubOys7N10003admyZa5rvvzyy5Kk9PR0WSwWrVixQvv27VNNTY3i4+M1ceJEjR8/Xi+88IJ27NihTZs26cknn3Qterv11lv1zTffaNKkSSoqKtKSJUuUn5/v0c/bvXt3FRcX66WXXtKOHTs0f/78Iy42jIqKUnZ2tj7//HO9//77uv3223XVVVcpJSVFkjRz5kzNmTNH8+fP19dff60tW7Zo0aJFeuyxxzyKB0DrIdkDP4uJidH69evVqVMnjRgxQr169dKYMWNUV1fnqvTvuusuXX/99crOzpbdbld8fLyuuOKK37zu008/rSuvvFK33XabevbsqVtuuUW1tbWSpBNOOEEzZ87UPffco+TkZOXm5kqSZs+eralTp2rOnDnq1auXLr74Yr355pvq0qWLpEPz6K+99pqWLVumvn37asGCBXrwwQc9+nkvv/xyjR8/Xrm5uerXr582bNigqVOnHjauW7duGjFihC655BINHjxYffr0cbu17uabb9Zzzz2nRYsWqXfv3jrvvPOUn5/vihWA/1mMX1tZBAAAggKVPQAAQY5kDwBAkCPZAwAQ5Ej2AAAEOZI9AABBjmQPAECQI9kDABDkSPYAAAQ5kj0AAEGOZA8AQJAj2QMAEOT+P30XGNiEmau3AAAAAElFTkSuQmCC", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm =confusion_matrix(y_test, my_pred)\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm,)\n", + "disp.plot()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 6. Testing Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\60172\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\sklearn\\base.py:439: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "predicted marker\n", + "1 53\n", + "0 47\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_testpred = knn_gscv.predict(mTest.values)\n", + "y_testpred = pd.DataFrame(y_testpred, columns=['predicted marker'])\n", + "y_testpred.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "y_testpred.to_csv('testresult.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab