diff --git a/PartB_gradient_boosting_v2.ipynb b/PartB_gradient_boosting_v2.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..2fab2204d740d7176199d1cfe7eaebb4e3daab29 --- /dev/null +++ b/PartB_gradient_boosting_v2.ipynb @@ -0,0 +1,1632 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1. Loading Datasets</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[ TrainingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 6000 entries, 0 to 5999\n", + "Columns: 129 entries, R1-PA1:VH to marker\n", + "dtypes: float64(112), int64(17)\n", + "memory usage: 5.9 MB\n", + "\n", + "[ TestingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 100 entries, 0 to 99\n", + "Columns: 128 entries, R1-PA1:VH to snort_log4\n", + "dtypes: float64(104), int64(24)\n", + "memory usage: 100.1 KB\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "mTrain = pd.read_csv(\"TrainingDataMulti.csv\")\n", + "mTest = pd.read_csv(\"TestingDataMulti.csv\")\n", + "\n", + "\n", + "print(\"\\n[ TrainingDataMulti.csv info ]\")\n", + "mTrain.info()\n", + "\n", + "print(\"\\n[ TestingDataMulti.csv info ]\")\n", + "mTest.info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1.1 Analysing the Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "R1-PA1:VH float64\n", + "R1-PM1:V float64\n", + "R1-PA2:VH float64\n", + "R1-PM2:V float64\n", + "R1-PA3:VH float64\n", + " ... \n", + "snort_log1 int64\n", + "snort_log2 int64\n", + "snort_log3 int64\n", + "snort_log4 int64\n", + "marker int64\n", + "Length: 129, dtype: object" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 3000\n", + "2 1500\n", + "1 1500\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain['marker'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.isnull().sum()\n", + "mTrain = mTrain.dropna()\n", + "mTrain\n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log3</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 128 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log3 control_panel_log4 relay1_log relay2_log \n", + "0 0 0 0 0 \\\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "5995 0 0 0 0 \n", + "5996 0 0 0 0 \n", + "5997 0 0 0 0 \n", + "5998 0 0 0 0 \n", + "5999 0 0 0 0 \n", + "\n", + " relay3_log relay4_log snort_log1 snort_log2 snort_log3 snort_log4 \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "5995 0 0 0 0 0 0 \n", + "5996 0 0 0 0 0 0 \n", + "5997 0 0 0 0 0 0 \n", + "5998 0 0 0 0 0 0 \n", + "5999 0 0 0 0 0 0 \n", + "\n", + "[6000 rows x 128 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = mTrain.drop(columns = 'marker')\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "y = mTrain['marker']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stratified Train-Test Split\n", + "The train-test split is stratified to ensure that the train and test samples from each class are almost the same percentage. This may be desirable for imbalanced number of samples as in this case. \n", + "\n", + "In such imbalanced datasets, the stratified K fold cross validation is used instead of the K-fold cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, test_size=0.15, stratify=y)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 2550\n", + "2 1275\n", + "1 1275\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 450\n", + "2 225\n", + "1 225\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>3. Choosing a Model: KNN , training, and evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "from xgboost import XGBClassifier\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "gb_clf = GradientBoostingClassifier(n_estimators=300, learning_rate=0.5, max_depth=8, random_state=10, subsample=1.0, max_features='log2')\n", + "gb_clf.fit(X_train, y_train)\n", + "y_pred = gb_clf.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9722222222222222" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gb_clf.score(X_test, y_test)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>4. Improving</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A grid search will be performed to find the optimal value of K. \n", + "\n", + "Afterwards, the stratified K fold cross validation will be used, followed by a confusion metric as an evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "# from scipy.stats import loguniform\n", + "# from sklearn.model_selection import RandomizedSearchCV\n", + "# from sklearn.ensemble import GradientBoostingRegressor\n", + "\n", + "# param_distributions = {\n", + "# \"n_estimators\": [1, 2, 5, 10, 20, 50, 100, 200, 500],\n", + "# \"max_leaf_nodes\": [2, 5, 10, 20, 50, 100],\n", + "# \"learning_rate\": loguniform(0.01, 1),\n", + "# }\n", + "# search_cv = RandomizedSearchCV(\n", + "# GradientBoostingRegressor(), param_distributions=param_distributions,\n", + "# scoring=\"neg_mean_absolute_error\", n_iter=20, random_state=0, n_jobs=2\n", + "# )\n", + "# search_cv.fit(X_train, y_train)\n", + "\n", + "# columns = [f\"param_{name}\" for name in param_distributions.keys()]\n", + "# columns += [\"mean_test_error\", \"std_test_error\"]\n", + "# cv_results = pd.DataFrame(search_cv.cv_results_)\n", + "# cv_results[\"mean_test_error\"] = -cv_results[\"mean_test_score\"]\n", + "# cv_results[\"std_test_error\"] = cv_results[\"std_test_score\"]\n", + "# cv_results[columns].sort_values(by=\"mean_test_error\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 5. Metric Evaluation</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[448, 2, 0],\n", + " [ 0, 218, 7],\n", + " [ 8, 8, 209]], dtype=int64)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "confusion_matrix(y_test, y_pred)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.98 1.00 0.99 450\n", + " 1 0.96 0.97 0.96 225\n", + " 2 0.97 0.93 0.95 225\n", + "\n", + " accuracy 0.97 900\n", + " macro avg 0.97 0.96 0.97 900\n", + "weighted avg 0.97 0.97 0.97 900\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABC7klEQVR4nO3deVxU5f4H8M+wDesMgsKIAqIkSuISGs6tTJPA5Zom3TZSMrNfBpaSZpa7GV2tNIu0WyZ2r1y1RUuuG1miJmqi5E6BFBgOqAQIyjZzfn9wmdsEJuNszDmf9+t1XjnPeZ5zvtMoX55lziMTBEEAERERiZaDrQMgIiIiy2KyJyIiEjkmeyIiIpFjsiciIhI5JnsiIiKRY7InIiISOSZ7IiIikXOydQCm0Ol0KCkpgZeXF2Qyma3DISIiIwmCgKtXryIgIAAODpbrf9bW1qK+vt7k67i4uMDV1dUMEVmXXSf7kpISBAYG2joMIiIyUXFxMbp27WqRa9fW1iIk2BOaMq3J11KpVCgsLLS7hG/Xyd7LywsA8MuxblB4ckZC7B4M62vrEMia+HBPSWhEAw5gu/7nuSXU19dDU6bFLzndoPC69VxRdVWH4MifUV9fz2RvTc1D9wpPB5M+QLIPTjJnW4dAVsVkLwn//ZitMRXr6SWDp9et30cH+50ututkT0RE1FZaQQetCb9DagWd+YKxMiZ7IiKSBB0E6EwYMTKlra1x7JuIiEjk2LMnIiJJ0EEHUwbiTWttW0z2REQkCVpBgNaEb3mY0tbWOIxPREQkcuzZExGRJEh5gR6TPRERSYIOArQSTfYcxiciIhI59uyJiEgSOIxPREQkclyNT0RERKLFnj0REUmC7r+HKe3tFZM9ERFJgtbE1fimtLU1JnsiIpIErQATd70zXyzWxjl7IiIikWPPnoiIJIFz9kRERCKngwxayExqb684jE9ERCRy7NkTEZEk6ISmw5T29orJnoiIJEFr4jC+KW1tjcP4REREIseePRERSYKUe/ZM9kREJAk6QQadYMJqfBPa2hqH8YmIiESOPXsiIpIEDuMTERGJnBYO0JowoK01YyzWxmRPRESSIJg4Zy9wzp6IiIjaK/bsiYhIEjhnT0REJHJawQFawYQ5ezt+XC6H8YmIiESOPXsiIpIEHWTQmdDH1cF+u/bs2RMRkSQ0z9mbctyqN954AzKZDNOnT9eX1dbWIjExEb6+vvD09ERcXBxKS0sN2hUVFWH06NFwd3eHn58fZs2ahcbGRqPvz2RPRERkQd9//z0++OAD9O3b16B8xowZ2LZtGz799FNkZWWhpKQE48eP15/XarUYPXo06uvrcfDgQaxfvx5paWmYP3++0TEw2RMRkSQ0L9Az5TBWdXU14uPj8eGHH6JDhw768srKSqxduxZvv/027rvvPkRGRmLdunU4ePAgDh06BADYvXs3zpw5g3/961/o378/Ro4ciSVLliA1NRX19fVGxcFkT0REktA0Z2/aAQBVVVUGR11d3Q3vmZiYiNGjRyM6OtqgPCcnBw0NDQblvXr1QlBQELKzswEA2dnZiIiIgL+/v75ObGwsqqqqcPr0aaPeO5M9ERGREQIDA6FUKvVHSkpKq/U2btyIY8eOtXpeo9HAxcUF3t7eBuX+/v7QaDT6Or9P9M3nm88Zg6vxiYhIEnQmPhu/eTV+cXExFAqFvlwul7eoW1xcjBdeeAGZmZlwdXW95XuaC3v2REQkCeaas1coFAZHa8k+JycHZWVluOOOO+Dk5AQnJydkZWVh1apVcHJygr+/P+rr61FRUWHQrrS0FCqVCgCgUqlarM5vft1cp62Y7ImISBJ0cDD5aKvhw4fj5MmTyM3N1R8DBw5EfHy8/s/Ozs7Ys2ePvk1eXh6KioqgVqsBAGq1GidPnkRZWZm+TmZmJhQKBcLDw4167xzGJyIiMjMvLy/06dPHoMzDwwO+vr768smTJyM5ORk+Pj5QKBSYNm0a1Go1Bg8eDACIiYlBeHg4JkyYgGXLlkGj0WDu3LlITExsdTThzzDZExGRJGgFGbQmbFNrStvWrFixAg4ODoiLi0NdXR1iY2Px/vvv6887OjoiIyMDU6dOhVqthoeHBxISErB48WKj78VkT0REkqA1cYGe1sTH5e7du9fgtaurK1JTU5GamnrDNsHBwdi+fbtJ9wU4Z09ERCR67NkTEZEk6AQH6EzY4lYn2O9GOEz2REQkCbYexrclDuMTERGJHHv2REQkCTqYtqJeZ75QrI7JnoiIJMHYB+O01t5e2W/kRERE1Cbs2RMRkSTc6p70v29vr5jsiYhIEn6/J/2ttrdXTPZERCQJUu7Z22/kIrbpXT/EBvTH6vldWpwTBODV+O6IDeiPgzuUBufyct0w++EeGN8rAnG9++CVx7qj4LTt91Em4z2SVIpV/8nDlrwT2PTDKSxYex5de9TaOiyyoDFPXsb6w2ew7fwJvJPxE8L6X7N1SCQi7SLZp6amolu3bnB1dUVUVBSOHDli65BsJi/XDf/5ly9Cwq+3en7Lh50ga2Uk6XqNA16N74FOAfV4J+NHvLU1H26eOrz6eA80Nlg4aDK7voOrsW19R0wfcxvmPNYDjs7A6+kFkLtpbR0aWcC9D/yGZxaUYMPbKiTG9sT5M65Ymn4eSl/+4zWn5ofqmHLYK5tHvmnTJiQnJ2PBggU4duwY+vXrh9jYWIP9e6Xieo0D/p4UjOnLi+GlbPlDveCUGz7/oBOS3y5qca44X46rvzlh4iwNAkPr0C2sFk8ka/DbJWeUXnCxRvhkRq8+0QOZm33xy49uOH/GDW9ND4J/1wbc1rf1XwLJvo1/5jJ2pvtg9yYfFP3kilWzu6Luugyxj5XbOjRR0Qkykw97ZfNk//bbb2PKlCmYNGkSwsPDsWbNGri7u+Pjjz+2dWhW994rXXHn8CrcMaS6xbnaazK8kRiMxKUX4OPX2OJ81x51UHRoxK5/+6KhXoa66zLs/Lcvgm6rhSqw3hrhkwV5KJp++bta4WjjSMjcnJx1uK3vNRzb76UvEwQZju/3Qngkh/LJPGy6QK++vh45OTmYM2eOvszBwQHR0dHIzs5uUb+urg51dXX611VVVVaJ0xr2bvVG/kk3vLv9x1bPf7CwC8IH1uAvI1p/z+6eOiz/PB8LnwpB+kp/AEBASB1e/3cBHLkM067JZAKeXfQrTh3xwC95brYOh8xM4aOFoxNQccnwH+pvl50QGFp3g1Z0K3QmDsXzoTq36PLly9BqtfD39zco9/f3h0ajaVE/JSUFSqVSfwQGBlorVIsq+9UZq+d3wez3foGLa8uNFrJ3KZD7nReeXfzrDa9Rd12Gt18MxO2DarAy40e8/eVP6NarFvMmdEfddfsdeiIg6fULCA67jpTngm0dCpFda971zpTDXtlVn2/OnDlITk7Wv66qqhJFws8/4Y6Ky85IjA3Tl+m0Mpw85IGv1nXEXydexsWfXTC+V4RBuyVTuqFPVA2Wf56Pb7d0QGmxC1Zu+wkO//37+HLqL4jr3QfZu5QYOq7Ciu+IzCXxtQuIiq7Ci+NDcfki116IUVW5I7SNgHcnw+m5Dh0b8dslu/oRTe2YTf8mdezYEY6OjigtLTUoLy0thUqlalFfLpdDLpdbKzyr6X/PVXzwzTmDsrdmBCEwtBYPJ5ZB4dOI0ROuGJz/v/t64f8W/orBMU3D+nXXHeDgAIOV+g4OAmQyQGfPuzdIloDE137FX0ZUYtbfQlFaLL6/99SkscEBP51wx4C7ryJ7Z9PXaWUyAf3vrsZXab42jk5ctJBBa8KDcUxpa2s2TfYuLi6IjIzEnj17MG7cOACATqfDnj17kJSUZMvQrMrdU4duvQy/Q+3qroNXB62+vLVFeX5dGqAKalp8N2DIVXz4WgDee6Urxj51CTqdDJvf84OjE9DvrpYL/qh9S3r9AoaN+w0Ln+qO69UO6NCp6StYNVcdUV9rv0OJ1Lov/tERM1cW48cf3JF33B0PTrkEV3cddm/0sXVoomLqUDyH8U2QnJyMhIQEDBw4EHfeeSdWrlyJmpoaTJo0ydah2ZWg2+qwKO08NrytwvQxPSFzEBDa5zqWbiiAr3/LXxSofRuT0DSS8+bn+Qblb84IROZm9vbEJuurDlD6ajFxlgYdOjXi/Gk3vBofgorLzrYOjUTC5sn+kUcewaVLlzB//nxoNBr0798fO3fubLFoT2qW/+GH/B/tKsltURZ5bzUi7/3zdmQfYrv0t3UIZGVfreuIr9Z1tHUYoqaFaUPx9vxIK5snewBISkqS1LA9ERFZH4fxiYiIRI4b4RAREZFosWdPRESSIJi4n73Ar94RERG1bxzGJyIiItFiz56IiCTB1G1q7XmLWyZ7IiKSBK2Ju96Z0tbW7DdyIiIiahMmeyIikoTmYXxTDmOsXr0affv2hUKhgEKhgFqtxo4dO/Tnhw4dCplMZnA8++yzBtcoKirC6NGj4e7uDj8/P8yaNQuNjcY/Ap3D+EREJAk6OEBnQh/X2LZdu3bFG2+8gdtuuw2CIGD9+vUYO3Ysjh8/jttvvx0AMGXKFCxevFjfxt3dXf9nrVaL0aNHQ6VS4eDBg7h48SImTpwIZ2dnvP7660bFwmRPRERkAWPGjDF4vXTpUqxevRqHDh3SJ3t3d/dWt3QHgN27d+PMmTP4+uuv4e/vj/79+2PJkiWYPXs2Fi5cCBcXlzbHwmF8IiKSBK0gM/kAgKqqKoOjrq7u5vfWarFx40bU1NRArVbryzds2ICOHTuiT58+mDNnDq5du6Y/l52djYiICION4WJjY1FVVYXTp08b9d7ZsyciIkkw11fvAgMDDcoXLFiAhQsXttrm5MmTUKvVqK2thaenJ7Zs2YLw8HAAwOOPP47g4GAEBATgxIkTmD17NvLy8vDFF18AADQaTYsdYJtfazQao2JnsiciIkkQTNz1Tvhv2+LiYigUCn25XC6/YZuwsDDk5uaisrISn332GRISEpCVlYXw8HA888wz+noRERHo3Lkzhg8fjoKCAvTo0eOW42wNh/GJiIiM0Ly6vvn4s2Tv4uKC0NBQREZGIiUlBf369cM777zTat2oqCgAQH5+PgBApVKhtLTUoE7z6xvN898Ikz0REUmCFjKTD1PpdLobzvHn5uYCADp37gwAUKvVOHnyJMrKyvR1MjMzoVAo9FMBbcVhfCIikgSdYNojb3WCcfXnzJmDkSNHIigoCFevXkV6ejr27t2LXbt2oaCgAOnp6Rg1ahR8fX1x4sQJzJgxA0OGDEHfvn0BADExMQgPD8eECROwbNkyaDQazJ07F4mJiX86mtAaJnsiIiILKCsrw8SJE3Hx4kUolUr07dsXu3btwv3334/i4mJ8/fXXWLlyJWpqahAYGIi4uDjMnTtX397R0REZGRmYOnUq1Go1PDw8kJCQYPC9/LZisiciIknQmbhAz9i2a9euveG5wMBAZGVl3fQawcHB2L59u1H3bQ2TPRERSYIOMuhMmHc3pa2tcYEeERGRyLFnT0REkvD7p+Ddant7xWRPRESSYO05+/bEfiMnIiKiNmHPnoiIJEEHE5+Nb8cL9JjsiYhIEgQTV+MLTPZERETtm7l2vbNHnLMnIiISOfbsiYhIEqS8Gp/JnoiIJIHD+ERERCRa7NkTEZEkSPnZ+Ez2REQkCRzGJyIiItFiz56IiCRByj17JnsiIpIEKSd7DuMTERGJHHv2REQkCVLu2TPZExGRJAgw7etzgvlCsTomeyIikgQp9+w5Z09ERCRy7NkTEZEkSLlnz2RPRESSIOVkz2F8IiIikWPPnoiIJEHKPXsmeyIikgRBkEEwIWGb0tbWOIxPREQkcuzZExGRJHA/eyIiIpGT8pw9h/GJiIhEjsmeiIgkoXmBnimHMVavXo2+fftCoVBAoVBArVZjx44d+vO1tbVITEyEr68vPD09ERcXh9LSUoNrFBUVYfTo0XB3d4efnx9mzZqFxsZGo987kz0REUlC8zC+KYcxunbtijfeeAM5OTk4evQo7rvvPowdOxanT58GAMyYMQPbtm3Dp59+iqysLJSUlGD8+PH69lqtFqNHj0Z9fT0OHjyI9evXIy0tDfPnzzf6vcsEQbDbjXyqqqqgVCrx24/dofDi7y1iF9tlgK1DIGuy3x9NZIRGoQF78SUqKyuhUCgsco/mXBH5+Qw4echv+TqNNXXIiVuB4uJig1jlcjnk8rZd18fHB8uXL8dDDz2ETp06IT09HQ899BAA4Ny5c+jduzeys7MxePBg7NixA3/9619RUlICf39/AMCaNWswe/ZsXLp0CS4uLm2OnRmSiIjICIGBgVAqlfojJSXlpm20Wi02btyImpoaqNVq5OTkoKGhAdHR0fo6vXr1QlBQELKzswEA2dnZiIiI0Cd6AIiNjUVVVZV+dKCtRLEa/8GeEXCSOds6DLKwguWDbR0CWVHoKzm2DoGsQCbIgAbr3EswcTV+85x9az37Gzl58iTUajVqa2vh6emJLVu2IDw8HLm5uXBxcYG3t7dBfX9/f2g0GgCARqMxSPTN55vPGUMUyZ6IiOhmBJg2O9TctHnBXVuEhYUhNzcXlZWV+Oyzz5CQkICsrKxbD+IWMdkTERFZiIuLC0JDQwEAkZGR+P777/HOO+/gkUceQX19PSoqKgx696WlpVCpVAAAlUqFI0eOGFyvebV+c5224pw9ERFJQvMT9Ew5TI5Bp0NdXR0iIyPh7OyMPXv26M/l5eWhqKgIarUaAKBWq3Hy5EmUlZXp62RmZkKhUCA8PNyo+7JnT0REkmDtjXDmzJmDkSNHIigoCFevXkV6ejr27t2LXbt2QalUYvLkyUhOToaPjw8UCgWmTZsGtVqNwYOb1ifFxMQgPDwcEyZMwLJly6DRaDB37lwkJia2efV/MyZ7IiIiCygrK8PEiRNx8eJFKJVK9O3bF7t27cL9998PAFixYgUcHBwQFxeHuro6xMbG4v3339e3d3R0REZGBqZOnQq1Wg0PDw8kJCRg8eLFRsfCZE9ERJKgE2RNq/9NaG+MtWvX/ul5V1dXpKamIjU19YZ1goODsX37dqPu2xomeyIikgRBMHE1vh0/54kL9IiIiESOPXsiIpIEay/Qa0+Y7ImISBKY7ImIiETO2gv02hPO2RMREYkce/ZERCQJUl6Nz2RPRESS0JTsTZmzN2MwVsZhfCIiIpFjz56IiCSBq/GJiIhETsD/9qS/1fb2isP4REREIseePRERSQKH8YmIiMROwuP4TPZERCQNJvbsYcc9e87ZExERiRx79kREJAl8gh4REZHISXmBHofxiYiIRI49eyIikgZBZtoiOzvu2TPZExGRJEh5zp7D+ERERCLHnj0REUkDH6pDREQkblJejd+mZP/VV1+1+YIPPPDALQdDRERE5temZD9u3Lg2XUwmk0Gr1ZoSDxERkeXY8VC8KdqU7HU6naXjICIisigpD+ObtBq/trbWXHEQERFZlmCGw04Zney1Wi2WLFmCLl26wNPTE+fPnwcAzJs3D2vXrjV7gERERGQao5P90qVLkZaWhmXLlsHFxUVf3qdPH3z00UdmDY6IiMh8ZGY47JPRyf6TTz7BP/7xD8THx8PR0VFf3q9fP5w7d86swREREZmNlYfxU1JSMGjQIHh5ecHPzw/jxo1DXl6eQZ2hQ4dCJpMZHM8++6xBnaKiIowePRru7u7w8/PDrFmz0NjYaFQsRn/P/tdff0VoaGiLcp1Oh4aGBmMvR0REJEpZWVlITEzEoEGD0NjYiFdeeQUxMTE4c+YMPDw89PWmTJmCxYsX61+7u7vr/6zVajF69GioVCocPHgQFy9exMSJE+Hs7IzXX3+9zbEYnezDw8Oxf/9+BAcHG5R/9tlnGDBggLGXIyIisg4zPUGvqqrKoFgul0Mul7eovnPnToPXaWlp8PPzQ05ODoYMGaIvd3d3h0qlavWWu3fvxpkzZ/D111/D398f/fv3x5IlSzB79mwsXLjQYDr9zxg9jD9//nwkJSXh73//O3Q6Hb744gtMmTIFS5cuxfz58429HBERkXU073pnygEgMDAQSqVSf6SkpLTp9pWVlQAAHx8fg/INGzagY8eO6NOnD+bMmYNr167pz2VnZyMiIgL+/v76stjYWFRVVeH06dNtfutG9+zHjh2Lbdu2YfHixfDw8MD8+fNxxx13YNu2bbj//vuNvRwREZFdKS4uhkKh0L9urVf/RzqdDtOnT8ddd92FPn366Msff/xxBAcHIyAgACdOnMDs2bORl5eHL774AgCg0WgMEj0A/WuNRtPmmG/p2fj33HMPMjMzb6UpERGRTZhri1uFQmGQ7NsiMTERp06dwoEDBwzKn3nmGf2fIyIi0LlzZwwfPhwFBQXo0aPHrQf7B7e8Ec7Ro0dx9uxZAE3z+JGRkWYLioiIyOxstOtdUlISMjIysG/fPnTt2vVP60ZFRQEA8vPz0aNHD6hUKhw5csSgTmlpKQDccJ6/NUYn+wsXLuCxxx7Dd999B29vbwBARUUF/vKXv2Djxo03fSNERERSIAgCpk2bhi1btmDv3r0ICQm5aZvc3FwAQOfOnQEAarUaS5cuRVlZGfz8/AAAmZmZUCgUCA8Pb3MsRi/Qe/rpp9HQ0ICzZ8+ivLwc5eXlOHv2LHQ6HZ5++mljL0dERGQdZlqg11aJiYn417/+hfT0dHh5eUGj0UCj0eD69esAgIKCAixZsgQ5OTn4+eef8dVXX2HixIkYMmQI+vbtCwCIiYlBeHg4JkyYgB9++AG7du3C3LlzkZiY2Ka1As2M7tlnZWXh4MGDCAsL05eFhYXh3XffxT333GPs5YiIiKxCJjQdprQ3xurVqwE0PTjn99atW4cnn3wSLi4u+Prrr7Fy5UrU1NQgMDAQcXFxmDt3rr6uo6MjMjIyMHXqVKjVanh4eCAhIcHge/ltYXSyDwwMbPXhOVqtFgEBAcZejoiIyDqsPGcv3GQ1YGBgILKysm56neDgYGzfvt24m/+B0cP4y5cvx7Rp03D06FF92dGjR/HCCy/gzTffNCkYIiIiMr829ew7dOgAmex/cxU1NTWIioqCk1NT88bGRjg5OeGpp57CuHHjLBIoERGRSW5h3r1FezvVpmS/cuVKC4dBRERkYTb66l170KZkn5CQYOk4iIiIyEJu+aE6AFBbW4v6+nqDMmOfKkRERGQVEu7ZG71Ar6amBklJSfDz84OHhwc6dOhgcBAREbVLVt7Pvj0xOtm/9NJL+Oabb7B69WrI5XJ89NFHWLRoEQICAvDJJ59YIkYiIiIygdHD+Nu2bcMnn3yCoUOHYtKkSbjnnnsQGhqK4OBgbNiwAfHx8ZaIk4iIyDQSXo1vdM++vLwc3bt3B9A0P19eXg4AuPvuu7Fv3z7zRkdERGQmzU/QM+WwV0b37Lt3747CwkIEBQWhV69e2Lx5M+68805s27ZNvzEOWc6YJy/joall8OnUiPNn3PD+3C7Iy3W3dVhkhGdvP4aYwEJ0V1SgTuuIY5dUWHZ8MAqveuvrPBJ6Bg90+wm3+1yGp3MDBmyehKsNhs/B7uZVgZcHHMIdnTRwcdTi3G++WHliEA6VdrHyOyJTrD/wA/wD61uUb/vED6nzgm0QEYmR0T37SZMm4YcffgAAvPzyy0hNTYWrqytmzJiBWbNmGXWtffv2YcyYMQgICIBMJsPWrVuNDUdS7n3gNzyzoAQb3lYhMbYnzp9xxdL081D6tnx8MbVfd/pdxL9+vB1/2/UgEvb8FU4OOqQNz4Cb4/8+RzfHRuwrCcLqU3fc8DofDt0BRwcdJuwZg7E74nCuwhf/GLoDHV2vWeNtkJk8/0A4HhvYX3/MebwnAGD/f7jg2ewkvEDP6J79jBkz9H+Ojo7GuXPnkJOTg9DQUP0uPW1VU1ODfv364amnnsL48eONDUVyxj9zGTvTfbB7kw8AYNXsrrhzeBViHyvH5vf8bRwdtdVT3442eD07exiOPLQefXwv4fuypv0l0vKa/i1F+f3a6jU6yK8jRFGJOYeGIq/CFwCw/HgUnuh5Gj29y3FZw9Eee1FZ7mzw+uGplSj5WY4Th7xsFBGJkUnfsweaHtAfHHxrQ00jR47EyJEjTQ1BEpycdbit7zVsfM9PXyYIMhzf74XwSPbk7JmXc9MQbkWda5vb/FbnioJKbzzYPQ+nyzuiXueIR287g8vX3XCqvJOlQiULc3LW4b4Hr+CLj/wB2O9isPZKBhN3vTNbJNbXpmS/atWqNl/w+eefv+Vgbqaurg51dXX611VVVRa7V3uj8NHC0QmouGT4kf122QmBoXU3aEXtnQwCXh34HY6WqfBTpY9RLRP2/BWr792FHx5ZC50gw5VaNzz17WhU1bd9j2tqX9QxFfBUNCLz0462DoVEpk3JfsWKFW26mEwms2iyT0lJwaJFiyx2fSJrWzhoP3oqy/Ho7nFGthSwcNABXKl1xaOZY1HX6ISHQ8/iH0N34MEd43Gp1sMS4ZKFjXjkEr7fq0R5mYutQxEnCX/1rk3JvrCw0NJxtMmcOXOQnJysf11VVYXAwEAbRmQ9VeWO0DYC3p0aDco7dGzEb5dMno0hG1gwcD/u6/ILHsscC811T6Paqv1/xbAuvyDy00mobmxKDAu+74S7VOkY3/1HfHBmgCVCJgvy61KH/ndXYcn/hdo6FPHi43Ltg1wuh0KhMDikorHBAT+dcMeAu6/qy2QyAf3vrsaZHC7Gsi8CFgzcj/sDC/HEnjG4UGP832M3p6Zf+nR/mEXUQQYHe/4ysITF/O0yKq8448g33rYOhUSIXUI78sU/OmLmymL8+IM78o6748Epl+DqrsPujcbM9ZKtLRq0H2O65ePZrBGoaXDRf1XuaoML6rRN/yQ7ul5DJ7drCPZqWpcS5l2OmkZnlNR4orLeFccv+6OyXo5l6m/w3slI1Gqd8EjoWXT1uIpvfw2y2XujWyOTCbj/b5eR+ZkvdFr7HSpu9yTcs7dpsq+urkZ+fr7+dWFhIXJzc+Hj44OgIP7A+qOsrzpA6avFxFkadOjUiPOn3fBqfAgqLjvfvDG1G/E9zwAA0u//yqD8peyh+OJ8LwDA47edxvN9c/TnNsZ8aVDntzo3PPXtKLzY7wj+Gb0Nzg46/FThg2f3jcC5Ci7usjcD7q6Cf9d67N7Mb1JYkqlPwbPnQTOZIAg2C3/v3r0YNmxYi/KEhASkpaXdtH1VVRWUSiWGYiycZEx4YlewXG3rEMiKQl/JuXklsnuNQgO+bfgUlZWVFpuabc4V3ZYuhYNr27/i+ke62lr8/OqrFo3VUmzasx86dChs+LsGERFJiYSH8W9pgd7+/fvxxBNPQK1W49dfm57w9c9//hMHDhwwa3BERERmI+HH5Rqd7D///HPExsbCzc0Nx48f1z/kprKyEq+//rrZAyQiIiLTGJ3sX3vtNaxZswYffvghnJ3/N09+11134dixY2YNjoiIyFy4xa0R8vLyMGTIkBblSqUSFRUV5oiJiIjI/CT8BD2je/Yqlcrg63LNDhw4gO7du5slKCIiIrPjnH3bTZkyBS+88AIOHz4MmUyGkpISbNiwATNnzsTUqVMtESMRERGZwOhh/Jdffhk6nQ7Dhw/HtWvXMGTIEMjlcsycORPTpk2zRIxEREQmk/JDdYxO9jKZDK+++ipmzZqF/Px8VFdXIzw8HJ6exm3kQUREZFUS/p79LT9Ux8XFBeHh4eaMhYiIiCzA6Dn7YcOG4b777rvhQURE1C6Z+rU7I3v2KSkpGDRoELy8vODn54dx48YhLy/PoE5tbS0SExPh6+sLT09PxMXFobS01KBOUVERRo8eDXd3d/j5+WHWrFlobDTc7vxmjE72/fv3R79+/fRHeHg46uvrcezYMURERBh7OSIiIuuw8mr8rKwsJCYm4tChQ8jMzERDQwNiYmJQU1OjrzNjxgxs27YNn376KbKyslBSUoLx48frz2u1WowePRr19fU4ePAg1q9fj7S0NMyfP9+oWIwexl+xYkWr5QsXLkR1dbWxlyMiIhKlnTt3GrxOS0uDn58fcnJyMGTIEFRWVmLt2rVIT0/Xj4yvW7cOvXv3xqFDhzB48GDs3r0bZ86cwddffw1/f3/0798fS5YswezZs7Fw4UK4uLi0KZZbejZ+a5544gl8/PHH5rocERGReZmpZ19VVWVwND82/mYqKysBAD4+PgCAnJwcNDQ0IDo6Wl+nV69eCAoKQnZ2NgAgOzsbERER8Pf319eJjY1FVVUVTp8+3ea3brZkn52dDVcTtg4kIiKyJHM9LjcwMBBKpVJ/pKSk3PTeOp0O06dPx1133YU+ffoAADQaDVxcXODt7W1Q19/fHxqNRl/n94m++XzzubYyehj/93MJACAIAi5evIijR49i3rx5xl6OiIjIrhQXFxvsZy+Xy2/aJjExEadOnbLZ7rBGJ3ulUmnw2sHBAWFhYVi8eDFiYmLMFhgREVF7pFAoDJL9zSQlJSEjIwP79u1D165d9eUqlQr19fWoqKgw6N2XlpZCpVLp6xw5csTges2r9ZvrtIVRyV6r1WLSpEmIiIhAhw4djGlKRERkW1Z+qI4gCJg2bRq2bNmCvXv3IiQkxOB8ZGQknJ2dsWfPHsTFxQFo2myuqKgIarUaAKBWq7F06VKUlZXBz88PAJCZmQmFQmHUs26MSvaOjo6IiYnB2bNnmeyJiMiuWPtxuYmJiUhPT8eXX34JLy8v/Ry7UqmEm5sblEolJk+ejOTkZPj4+EChUGDatGlQq9UYPHgwACAmJgbh4eGYMGECli1bBo1Gg7lz5yIxMbFN0wfNjF6g16dPH5w/f97YZkRERJKyevVqVFZWYujQoejcubP+2LRpk77OihUr8Ne//hVxcXEYMmQIVCoVvvjiC/15R0dHZGRkwNHREWq1Gk888QQmTpyIxYsXGxWL0XP2r732GmbOnIklS5YgMjISHh4eBueNmccgIiKyKis+314Qbn4zV1dXpKamIjU19YZ1goODsX37dpNiaXOyX7x4MV588UWMGjUKAPDAAw9AJpPpzwuCAJlMBq1Wa1JAREREFsGNcG5u0aJFePbZZ/Htt99aMh4iIiIyszYn++bhiHvvvddiwRAREVkK97Nvo98P2xMREdkVDuO3Tc+ePW+a8MvLy00KiIiIiMzLqGS/aNGiFk/QIyIisgccxm+jRx99VP8EHyIiIrsi4WH8Nj9Uh/P1RERE9sno1fhERER2ScI9+zYne51OZ8k4iIiILIpz9kRERGIn4Z690RvhEBERkX1hz56IiKRBwj17JnsiIpIEKc/ZcxifiIhI5NizJyIiaeAwPhERkbhxGJ+IiIhEiz17IiKSBg7jExERiZyEkz2H8YmIiESOPXsiIpIE2X8PU9rbKyZ7IiKSBgkP4zPZExGRJPCrd0RERCRa7NkTEZE0cBifiIhIAuw4YZuCw/hEREQix549ERFJgpQX6DHZExGRNEh4zp7D+ERERBawb98+jBkzBgEBAZDJZNi6davB+SeffBIymczgGDFihEGd8vJyxMfHQ6FQwNvbG5MnT0Z1dbXRsTDZExGRJDQP45tyGKOmpgb9+vVDamrqDeuMGDECFy9e1B///ve/Dc7Hx8fj9OnTyMzMREZGBvbt24dnnnnG6PfOYXwiIpIGKw/jjxw5EiNHjvzTOnK5HCqVqtVzZ8+exc6dO/H9999j4MCBAIB3330Xo0aNwptvvomAgIA2x8KePRERkRGqqqoMjrq6ulu+1t69e+Hn54ewsDBMnToVV65c0Z/Lzs6Gt7e3PtEDQHR0NBwcHHD48GGj7iOKnr3M2QUymbOtwyALC30lx9YhkBWdXxxp6xDICnS1tcDCT61yL3Otxg8MDDQoX7BgARYuXGj09UaMGIHx48cjJCQEBQUFeOWVVzBy5EhkZ2fD0dERGo0Gfn5+Bm2cnJzg4+MDjUZj1L1EkeyJiIhuykzD+MXFxVAoFPpiuVx+S5d79NFH9X+OiIhA37590aNHD+zduxfDhw83IdCWOIxPRETSIJjhAKBQKAyOW032f9S9e3d07NgR+fn5AACVSoWysjKDOo2NjSgvL7/hPP+NMNkTERG1AxcuXMCVK1fQuXNnAIBarUZFRQVycv43hfnNN99Ap9MhKirKqGtzGJ+IiCTB2k/Qq66u1vfSAaCwsBC5ubnw8fGBj48PFi1ahLi4OKhUKhQUFOCll15CaGgoYmNjAQC9e/fGiBEjMGXKFKxZswYNDQ1ISkrCo48+atRKfIA9eyIikgozDeO31dGjRzFgwAAMGDAAAJCcnIwBAwZg/vz5cHR0xIkTJ/DAAw+gZ8+emDx5MiIjI7F//36DaYENGzagV69eGD58OEaNGoW7774b//jHP4x+6+zZExERWcDQoUMhCDf+DWHXrl03vYaPjw/S09NNjoXJnoiIJEEmCJD9SfJtS3t7xWRPRETSwI1wiIiISKzYsyciIkngfvZERERix2F8IiIiEiv27ImISBI4jE9ERCR2Eh7GZ7InIiJJkHLPnnP2REREIseePRERSQOH8YmIiMTPnofiTcFhfCIiIpFjz56IiKRBEJoOU9rbKSZ7IiKSBK7GJyIiItFiz56IiKSBq/GJiIjETaZrOkxpb684jE9ERCRy7NkTEZE0cBifiIhI3KS8Gp/JnoiIpEHC37PnnD0REZHIsWdPRESSwGF8IiIisZPwAj0O4xMREYkce/ZERCQJHMYnIiISO67GJyIiIrFiz56IiCSBw/hERERix9X4REREZE779u3DmDFjEBAQAJlMhq1btxqcFwQB8+fPR+fOneHm5obo6Gj89NNPBnXKy8sRHx8PhUIBb29vTJ48GdXV1UbHwmRPRESS0DyMb8phjJqaGvTr1w+pqamtnl+2bBlWrVqFNWvW4PDhw/Dw8EBsbCxqa2v1deLj43H69GlkZmYiIyMD+/btwzPPPGP0e+cwPhERSYNOaDpMaQ+gqqrKoFgul0Mul7eoPnLkSIwcObLVSwmCgJUrV2Lu3LkYO3YsAOCTTz6Bv78/tm7dikcffRRnz57Fzp078f3332PgwIEAgHfffRejRo3Cm2++iYCAgDaHzp49ERFJg2CGA0BgYCCUSqX+SElJMTqUwsJCaDQaREdH68uUSiWioqKQnZ0NAMjOzoa3t7c+0QNAdHQ0HBwccPjwYaPux549ERGREYqLi6FQKPSvW+vV34xGowEA+Pv7G5T7+/vrz2k0Gvj5+Rmcd3Jygo+Pj75OWzHZExGRJMhg4lfv/vtfhUJhkOztAYfxiYhIGpqfoGfKYSYqlQoAUFpaalBeWlqqP6dSqVBWVmZwvrGxEeXl5fo6bcVkT0REZGUhISFQqVTYs2ePvqyqqgqHDx+GWq0GAKjValRUVCAnJ0df55tvvoFOp0NUVJRR9+MwPhERSYK1n6BXXV2N/Px8/evCwkLk5ubCx8cHQUFBmD59Ol577TXcdtttCAkJwbx58xAQEIBx48YBAHr37o0RI0ZgypQpWLNmDRoaGpCUlIRHH33UqJX4AJM9ERFJhZWfoHf06FEMGzZM/zo5ORkAkJCQgLS0NLz00kuoqanBM888g4qKCtx9993YuXMnXF1d9W02bNiApKQkDB8+HA4ODoiLi8OqVauMDp3JnoiIyAKGDh0K4U/m+WUyGRYvXozFixffsI6Pjw/S09NNjoXJnoiIJEEmCJCZsMjOlLa2xmRPRETSoPvvYUp7O8XV+ERERCLHnj0REUkCh/GJiIjETsL72TPZExGRNJj6FDw77tlzzp6IiEjk2LMnIiJJsPYT9NoTJns74eAg4IkZv+K+B6+gQ6cGXCl1wdefdUT6qs74315MJBb8vMXh//ocQ0xwIUKUFahrdMTxSyoszxmMwipvfR0Xh0bMGZSNUd3y4eKoxYGSQCw8dA+u1Lrr66hVF/DCgO/Rs0M5rjc6YUt+GFYcvxNagYOzRpHwMD6TvZ3429SLGP3EJbz1Ygh++dENt/WtQfLyQtRUOeLLNP+bX4DsCj9vcRikuoh/nbsdJ6/4wUmmQ/IdR/Dx/RkY9eUjuN7oDAB45c6DGNqlCC9kxeBqvQvmRx3Ae8N24bEdDwIAenW4jA+jt2P1iTvw0oH74O9eg8WD98HRQcDfj6pt+fbIjtj018KUlBQMGjQIXl5e8PPzw7hx45CXl2fLkNqt8MhqHMr0xpFvvFF6QY4D231wbL8SYf2rbR0aWQA/b3F4+uvR2FLQC/kVPjj3W0fMPjAMXTyrcbvvJQCAp3MdHgo9h5SjahzSdMHp8k6Y891QRPqVol/Hpq1PR3UrQN5vvkg9MRBFV5X4vjQAy3IGIz7sFDyc6m359uyOTGf6Ya9smuyzsrKQmJiIQ4cOITMzEw0NDYiJiUFNTY0tw2qXzuR4ov9fqtAlpBYAENL7Gm4feBXf7/W2bWBkEfy8xcnLpSk5V9Y1bXTSx/cyXBx1OFjSVV/nfFUH/FrtiQF+GgCAi6MWdVpHg+vUaZ3g6qTV/9JAbdSO9rO3NpsO4+/cudPgdVpaGvz8/JCTk4MhQ4a0qF9XV4e6ujr966qqKovH2F5sfr8z3D21+PCbk9BpZXBwFLB+eRd8u9XX1qGRBfDzFh8ZBLw66DvklKrwU4UPAKCj2zXUax1wtUFuUPdKrRs6ul4HAOwvCURC75MYHfITdvzcA53criGxX9P+5p3cr1n3TZDdaldz9pWVlQCadvlpTUpKChYtWmTNkNqNIX8tx33jruDvz3fHLz+6oUf4NfzfgqKmhVufd7R1eGRm/LzFZ8Hg/bitQzke2zHOqHbflQRiWc5gLB68H8vv/gb1Wke8fyISg/wvQhC4WNMofKiO7el0OkyfPh133XUX+vTp02qdOXPm6PcDBpp69oGBgdYK0aaefqUYm1d3Rta2pp7dz3nu8Otaj0eeu8gf/iLEz1tc5kftx7CuvyB+51iUXvPUl1++7g4XRx28nOsMeve+rtdxudZN/3rdmX5Yd6Yv/NyuobJejq6eVzEz8jCKryqs+j7sHR+X2w4kJibi1KlTOHDgwA3ryOVyyOXyG54XM7mbDjqd4W/xOi0gc7Dfv3x0Y/y8xULA/KgDuD+oEE/sfAAXqg2T86krHVGvdYC686/YXdQdABCiqEAXz2ocL1P94VoylF33AACMDslHSbUnTpfzFz9qm3aR7JOSkpCRkYF9+/aha9euN28gQYe/9sajSSW4VOLSNKx7+zU8+HQpdm/mP3Yx4uctDgui9mNM93xM/WYEahpc0NG1aY79aoML6rROqG6Q47P8Xpgz6CAq6+WornfBvKgDOFbmjx8u/+8rlpNvz8X+XwOhgwwxQYV4ps9xTM+6Hzp+z944/J69bQiCgGnTpmHLli3Yu3cvQkJCbBlOu/b+gmBMfPFXJC75Bd4dmx6ysiO9Eza8E2Dr0MgC+HmLQ3yvMwCADSO+MiiffWAothT0AgC8fuQvEAbJ8O7Q3XBx+N9DdX5vSJciTO17DC4OWpz7zRfPfTsC+34Nss6bEBMBpu1Jb7+5HjJBsN2vKs899xzS09Px5ZdfIiwsTF+uVCrh5ub2Jy2bVFVVQalUYpjz3+Akc7ZkqERkZecXR9o6BLICXW0tChe+isrKSigUllmD0Jwr7hvwMpwcXW/5Oo3aWnxz/A2LxmopNh0DWr16NSorKzF06FB07txZf2zatMmWYREREYmKzYfxiYiIrEKAiXP2ZovE6trFAj0iIiKLk/ACPS7lJCIiEjn27ImISBp0MG2HaDveCIfJnoiIJEHKT9DjMD4REZHIsWdPRETSIOEFekz2REQkDRJO9hzGJyIiEjn27ImISBrYsyciIhI5nRkOIyxcuBAymczg6NWrl/58bW0tEhMT4evrC09PT8TFxaG0tNTEN9k6JnsiIpKE5q/emXIY6/bbb8fFixf1x4EDB/TnZsyYgW3btuHTTz9FVlYWSkpKMH78eHO+ZT0O4xMRERmhqqrK4LVcLodcLm+1rpOTE1QqVYvyyspKrF27Funp6bjvvvsAAOvWrUPv3r1x6NAhDB482Kwxs2dPRETS0Dxnb8oBIDAwEEqlUn+kpKTc8JY//fQTAgIC0L17d8THx6OoqAgAkJOTg4aGBkRHR+vr9urVC0FBQcjOzjb7W2fPnoiIpEEnADITFtnpmtoWFxcb7Gd/o159VFQU0tLSEBYWhosXL2LRokW45557cOrUKWg0Gri4uMDb29ugjb+/PzQaza3HeANM9kREREZQKBQGyf5GRo4cqf9z3759ERUVheDgYGzevBlubm6WDLEFDuMTEZE0mGkY/1Z5e3ujZ8+eyM/Ph0qlQn19PSoqKgzqlJaWtjrHbyomeyIikghTE71pyb66uhoFBQXo3LkzIiMj4ezsjD179ujP5+XloaioCGq12sT32RKH8YmIiCxg5syZGDNmDIKDg1FSUoIFCxbA0dERjz32GJRKJSZPnozk5GT4+PhAoVBg2rRpUKvVZl+JDzDZExGRVFj5CXoXLlzAY489hitXrqBTp064++67cejQIXTq1AkAsGLFCjg4OCAuLg51dXWIjY3F+++/f+vx/QkmeyIikgadiUPxOuPabty48U/Pu7q6IjU1FampqbceUxtxzp6IiEjk2LMnIiJpEHRNhynt7RSTPRERSYOEd71jsiciImmw8px9e8I5eyIiIpFjz56IiKSBw/hEREQiJ8DEZG+2SKyOw/hEREQix549ERFJA4fxiYiIRE6nA2DCd+V19vs9ew7jExERiRx79kREJA0cxiciIhI5CSd7DuMTERGJHHv2REQkDRJ+XC6TPRERSYIg6CCYsHOdKW1tjcmeiIikQRBM651zzp6IiIjaK/bsiYhIGgQT5+ztuGfPZE9ERNKg0wEyE+bd7XjOnsP4REREIseePRERSQOH8YmIiMRN0OkgmDCMb89fveMwPhERkcixZ09ERNLAYXwiIiKR0wmATJrJnsP4REREIseePRERSYMgADDle/b227NnsiciIkkQdAIEE4bxBSZ7IiKidk7QwbSePb96R0RERK1ITU1Ft27d4OrqiqioKBw5csTqMTDZExGRJAg6weTDWJs2bUJycjIWLFiAY8eOoV+/foiNjUVZWZkF3uGNMdkTEZE0CDrTDyO9/fbbmDJlCiZNmoTw8HCsWbMG7u7u+Pjjjy3wBm/MrufsmxdLNAoNNo6EiMxNV1tr6xDICpo/Z2ssfmtEg0nP1GlEU66pqqoyKJfL5ZDL5S3q19fXIycnB3PmzNGXOTg4IDo6GtnZ2bceyC2w62R/9epVAMD+xq22DYSIzG/hp7aOgKzo6tWrUCqVFrm2i4sLVCoVDmi2m3wtT09PBAYGGpQtWLAACxcubFH38uXL0Gq18Pf3Nyj39/fHuXPnTI7FGHad7AMCAlBcXAwvLy/IZDJbh2M1VVVVCAwMRHFxMRQKha3DIQviZy0dUv2sBUHA1atXERAQYLF7uLq6orCwEPX19SZfSxCEFvmmtV59e2PXyd7BwQFdu3a1dRg2o1AoJPVDQcr4WUuHFD9rS/Xof8/V1RWurq4Wv8/vdezYEY6OjigtLTUoLy0thUqlsmosXKBHRERkAS4uLoiMjMSePXv0ZTqdDnv27IFarbZqLHbdsyciImrPkpOTkZCQgIEDB+LOO+/EypUrUVNTg0mTJlk1DiZ7OySXy7FgwQK7mCci0/Czlg5+1uL0yCOP4NKlS5g/fz40Gg369++PnTt3tli0Z2kywZ4f9ktEREQ3xTl7IiIikWOyJyIiEjkmeyIiIpFjsiciIhI5Jns70x62SiTL27dvH8aMGYOAgADIZDJs3brV1iGRhaSkpGDQoEHw8vKCn58fxo0bh7y8PFuHRSLDZG9H2stWiWR5NTU16NevH1JTU20dCllYVlYWEhMTcejQIWRmZqKhoQExMTGoqamxdWgkIvzqnR2JiorCoEGD8N577wFoehJTYGAgpk2bhpdfftnG0ZGlyGQybNmyBePGjbN1KGQFly5dgp+fH7KysjBkyBBbh0MiwZ69nWjeKjE6OlpfZqutEonIciorKwEAPj4+No6ExITJ3k782VaJGo3GRlERkTnpdDpMnz4dd911F/r06WPrcEhE+LhcIqJ2IjExEadOncKBAwdsHQqJDJO9nWhPWyUSkfklJSUhIyMD+/btk/TW3WQZHMa3E+1pq0QiMh9BEJCUlIQtW7bgm2++QUhIiK1DIhFiz96OtJetEsnyqqurkZ+fr39dWFiI3Nxc+Pj4ICgoyIaRkbklJiYiPT0dX375Jby8vPRrcJRKJdzc3GwcHYkFv3pnZ9577z0sX75cv1XiqlWrEBUVZeuwyMz27t2LYcOGtShPSEhAWlqa9QMii5HJZK2Wr1u3Dk8++aR1gyHRYrInIiISOc7ZExERiRyTPRERkcgx2RMREYkckz0REZHIMdkTERGJHJM9ERGRyDHZExERiRyTPRERkcgx2ROZ6Mknn8S4ceP0r4cOHYrp06dbPY69e/dCJpOhoqLihnVkMhm2bt3a5msuXLgQ/fv3Nymun3/+GTKZDLm5uSZdh4huHZM9idKTTz4JmUwGmUwGFxcXhIaGYvHixWhsbLT4vb/44gssWbKkTXXbkqCJiEzFjXBItEaMGIF169ahrq4O27dvR2JiIpydnTFnzpwWdevr6+Hi4mKW+/r4+JjlOkRE5sKePYmWXC6HSqVCcHAwpk6diujoaHz11VcA/jf0vnTpUgQEBCAsLAwAUFxcjIcffhje3t7w8fHB2LFj8fPPP+uvqdVqkZycDG9vb/j6+uKll17CH7eX+OMwfl1dHWbPno3AwEDI5XKEhoZi7dq1+Pnnn/Wb3XTo0AEymUy/8YlOp0NKSgpCQkLg5uaGfv364bPPPjO4z/bt29GzZ0+4ublh2LBhBnG21ezZs9GzZ0+4u7uje/fumDdvHhoaGlrU++CDDxAYGAh3d3c8/PDDqKysNDj/0UcfoXfv3nB1dUWvXr3w/vvvGx0LEVkOkz1JhpubG+rr6/Wv9+zZg7y8PGRmZiIjIwMNDQ2IjY2Fl5cX9u/fj++++w6enp4YMWKEvt1bb72FtLQ0fPzxxzhw4ADKy8uxZcuWP73vxIkT8e9//xurVq3C2bNn8cEHH8DT0xOBgYH4/PPPAQB5eXm4ePEi3nnnHQBASkoKPvnkE6xZswanT5/GjBkz8MQTTyArKwtA0y8l48ePx5gxY5Cbm4unn34aL7/8stH/T7y8vJCWloYzZ87gnXfewYcffogVK1YY1MnPz8fmzZuxbds27Ny5E8ePH8dzzz2nP79hwwbMnz8fS5cuxdmzZ/H6669j3rx5WL9+vdHxEJGFCEQilJCQIIwdO1YQBEHQ6XRCZmamIJfLhZkzZ+rP+/v7C3V1dfo2//znP4WwsDBBp9Ppy+rq6gQ3Nzdh165dgiAIQufOnYVly5bpzzc0NAhdu3bV30sQBOHee+8VXnjhBUEQBCEvL08AIGRmZrYa57fffisAEH777Td9WW1treDu7i4cPHjQoO7kyZOFxx57TBAEQZgzZ44QHh5ucH727NktrvVHAIQtW7bc8Pzy5cuFyMhI/esFCxYIjo6OwoULF/RlO3bsEBwcHISLFy8KgiAIPXr0ENLT0w2us2TJEkGtVguCIAiFhYUCAOH48eM3vC8RWRbn7Em0MjIy4OnpiYaGBuh0Ojz++ONYuHCh/nxERITBPP0PP/yA/Px8eHl5GVyntrYWBQUFqKysxMWLFxEVFaU/5+TkhIEDB7YYym+Wm5sLR0dH3HvvvW2OOz8/H9euXcP9999vUF5fX48BAwYAAM6ePWsQBwCo1eo236PZpk2bsGrVKhQUFKC6uhqNjY1QKBQGdYKCgtClSxeD++h0OuTl5cHLywsFBQWYPHkypkyZoq/T2NgIpVJpdDxEZBlM9iRaw4YNw+rVq+Hi4oKAgAA4ORn+dffw8DB4XV1djcjISGzYsKHFtTp16nRLMbi5uRndprq6GgDwn//8xyDJAk3rEMwlOzsb8fHxWLRoEWJjY6FUKrFx40a89dZbRsf64Ycftvjlw9HR0WyxEpFpmOxJtDw8PBAaGtrm+nfccQc2bdoEPz+/Fr3bZp07d8bhw4cxZMgQAE092JycHNxxxx2t1o+IiIBOp0NWVhaio6NbnG8eWdBqtfqy8PBwyOVyFBUV3XBEoHfv3vrFhs0OHTp08zf5OwcPHkRwcDBeffVVfdkvv/zSol5RURFKSkoQEBCgv4+DgwPCwsLg7++PgIAAnD9/HvHx8Ubdn4ishwv0iP4rPj4eHTt2xNixY7F//34UFhZi7969eP7553HhwgUAwAsvvIA33ngDW7duxblz5/Dcc8/96Xfku3XrhoSEBDz11FPYunWr/pqbN28GAAQHB0MmkyEjIwOXLl1CdXU1vLy8MHPmTMyYMQPr169HQUEBjh07hnfffVe/6O3ZZ5/FTz/9hFmzZiEvLw/p6elIS0sz6v3edtttKCoqwsaNG1FQUIBVq1a1utjQ1dUVCQkJ+OGHH7B//348//zzePjhh6FSqQAAixYtQkpKClatWoUff/wRJ0+exLp16/D2228bFQ8RWQ6TPdF/ubu7Y9++fQgKCsL48ePRu3dvTJ48GbW1tfqe/osvvogJEyYgISEBarUaXl5eePDBB//0uqtXr8ZDDz2E5557Dr169cKUKVNQU1MDAOjSpQsWLVqEl19+Gf7+/khKSgIALFmyBPPmzUNKSgp69+6NESNG4D//+Q9CQkIANM2jf/7559i6dSv69euHNWvW4PXXXzfq/T7wwAOYMWMGkpKS0L9/fxw8eBDz5s1rUS80NBTjx4/HqFGjEBMTg759+xp8te7pp5/GRx99hHXr1iEiIgL33nsv0tLS9LESke3JhButLCIiIiJRYM+eiIhI5JjsiYiIRI7JnoiISOSY7ImIiESOyZ6IiEjkmOyJiIhEjsmeiIhI5JjsiYiIRI7JnoiISOSY7ImIiESOyZ6IiEjk/h9+C3f4P1oUswAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm =confusion_matrix(y_test, y_pred)\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n", + "disp.plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.94703762 0.96512283 0.96383989 0.95301525 0.94614962]\n", + " StratifiedKFold Cross-Validation Accuracy: 95.50% | Standard Deviation: 0.81%\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import StratifiedKFold\n", + "\n", + "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n", + "cvScore = cross_val_score(gb_clf, X_train, y_train, cv=skf, scoring='f1_macro')\n", + "print (cvScore)\n", + "print (\" StratifiedKFold Cross-Validation Accuracy: %0.2f%% | Standard Deviation: %0.2f%%\" % (100*cvScore.mean(), 100*cvScore.std()))\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 6. Testing Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\60172\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\sklearn\\base.py:439: UserWarning: X does not have valid feature names, but GradientBoostingClassifier was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "predicted marker\n", + "1 47\n", + "0 31\n", + "2 22\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_testpred = gb_clf.predict(mTest.values)\n", + "y_testpred = pd.DataFrame(y_testpred, columns=['predicted marker'])\n", + "y_testpred.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "y_testpred.to_csv('testresult.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}