diff --git a/Part_B_gradient_boosting.ipynb b/Part_B_gradient_boosting.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3793d6154e67dac89f1181c079115fe9ac6387c1 --- /dev/null +++ b/Part_B_gradient_boosting.ipynb @@ -0,0 +1,1876 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1. Loading Datasets</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[ TrainingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 6000 entries, 0 to 5999\n", + "Columns: 129 entries, R1-PA1:VH to marker\n", + "dtypes: float64(112), int64(17)\n", + "memory usage: 5.9 MB\n", + "\n", + "[ TestingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 100 entries, 0 to 99\n", + "Columns: 128 entries, R1-PA1:VH to snort_log4\n", + "dtypes: float64(104), int64(24)\n", + "memory usage: 100.1 KB\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "mTrain = pd.read_csv(\"TrainingDataMulti.csv\")\n", + "mTest = pd.read_csv(\"TestingDataMulti.csv\")\n", + "\n", + "\n", + "print(\"\\n[ TrainingDataMulti.csv info ]\")\n", + "mTrain.info()\n", + "\n", + "print(\"\\n[ TestingDataMulti.csv info ]\")\n", + "mTest.info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1.1 Analysing the Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "R1-PA1:VH float64\n", + "R1-PM1:V float64\n", + "R1-PA2:VH float64\n", + "R1-PM2:V float64\n", + "R1-PA3:VH float64\n", + " ... \n", + "snort_log1 int64\n", + "snort_log2 int64\n", + "snort_log3 int64\n", + "snort_log4 int64\n", + "marker int64\n", + "Length: 129, dtype: object" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 3000\n", + "2 1500\n", + "1 1500\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain['marker'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.isnull().sum()\n", + "mTrain = mTrain.dropna()\n", + "mTrain\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log3</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 128 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log3 control_panel_log4 relay1_log relay2_log \n", + "0 0 0 0 0 \\\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "5995 0 0 0 0 \n", + "5996 0 0 0 0 \n", + "5997 0 0 0 0 \n", + "5998 0 0 0 0 \n", + "5999 0 0 0 0 \n", + "\n", + " relay3_log relay4_log snort_log1 snort_log2 snort_log3 snort_log4 \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "5995 0 0 0 0 0 0 \n", + "5996 0 0 0 0 0 0 \n", + "5997 0 0 0 0 0 0 \n", + "5998 0 0 0 0 0 0 \n", + "5999 0 0 0 0 0 0 \n", + "\n", + "[6000 rows x 128 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = mTrain.drop(columns = 'marker')\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "y = mTrain['marker']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stratified Train-Test Split\n", + "The train-test split is stratified to ensure that the train and test samples from each class are almost the same percentage. This may be desirable for imbalanced number of samples as in this case. \n", + "\n", + "In such imbalanced datasets, the stratified K fold cross validation is used instead of the K-fold cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, test_size=0.2, stratify=y)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 2400\n", + "1 1200\n", + "2 1200\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 600\n", + "1 300\n", + "2 300\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>3. Choosing a Model: KNN , training, and evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "from xgboost import XGBClassifier\n", + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "gb_clf = GradientBoostingClassifier(n_estimators=300, learning_rate=0.5, max_depth=8, random_state=10, subsample=1.0, max_features='log2')\n", + "gb_clf.fit(X_train, y_train)\n", + "y_pred = gb_clf.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8708333333333333" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gb_clf.score(X_test, y_test)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>4. Improving</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A grid search will be performed to find the optimal value of K. \n", + "\n", + "Afterwards, the stratified K fold cross validation will be used, followed by a confusion metric as an evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>param_n_estimators</th>\n", + " <th>param_max_leaf_nodes</th>\n", + " <th>param_learning_rate</th>\n", + " <th>mean_test_error</th>\n", + " <th>std_test_error</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>500</td>\n", + " <td>100</td>\n", + " <td>0.709894</td>\n", + " <td>0.257099</td>\n", + " <td>0.012799</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>500</td>\n", + " <td>5</td>\n", + " <td>0.771785</td>\n", + " <td>0.269003</td>\n", + " <td>0.009152</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>200</td>\n", + " <td>20</td>\n", + " <td>0.160519</td>\n", + " <td>0.293038</td>\n", + " <td>0.010013</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>200</td>\n", + " <td>50</td>\n", + " <td>0.110585</td>\n", + " <td>0.321914</td>\n", + " <td>0.007619</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>200</td>\n", + " <td>20</td>\n", + " <td>0.109889</td>\n", + " <td>0.322681</td>\n", + " <td>0.008026</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>10</td>\n", + " <td>5</td>\n", + " <td>0.637819</td>\n", + " <td>0.432431</td>\n", + " <td>0.011625</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>500</td>\n", + " <td>2</td>\n", + " <td>0.07502</td>\n", + " <td>0.469790</td>\n", + " <td>0.011961</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>100</td>\n", + " <td>5</td>\n", + " <td>0.0351</td>\n", + " <td>0.477876</td>\n", + " <td>0.011579</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>5</td>\n", + " <td>20</td>\n", + " <td>0.202432</td>\n", + " <td>0.559196</td>\n", + " <td>0.017641</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>10</td>\n", + " <td>5</td>\n", + " <td>0.088556</td>\n", + " <td>0.587139</td>\n", + " <td>0.017203</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>5</td>\n", + " <td>2</td>\n", + " <td>0.462636</td>\n", + " <td>0.603163</td>\n", + " <td>0.014503</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>50</td>\n", + " <td>100</td>\n", + " <td>0.010904</td>\n", + " <td>0.611627</td>\n", + " <td>0.016837</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>5</td>\n", + " <td>100</td>\n", + " <td>0.070357</td>\n", + " <td>0.650331</td>\n", + " <td>0.017045</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>2</td>\n", + " <td>50</td>\n", + " <td>0.167568</td>\n", + " <td>0.650802</td>\n", + " <td>0.017337</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>0.421054</td>\n", + " <td>0.659051</td>\n", + " <td>0.017061</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>0.190477</td>\n", + " <td>0.696568</td>\n", + " <td>0.016156</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>5</td>\n", + " <td>20</td>\n", + " <td>0.033815</td>\n", + " <td>0.698969</td>\n", + " <td>0.016434</td>\n", + " </tr>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>100</td>\n", + " <td>0.125207</td>\n", + " <td>0.709805</td>\n", + " <td>0.016143</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>1</td>\n", + " <td>10</td>\n", + " <td>0.081715</td>\n", + " <td>0.723857</td>\n", + " <td>0.015875</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>1</td>\n", + " <td>20</td>\n", + " <td>0.014937</td>\n", + " <td>0.745432</td>\n", + " <td>0.015467</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " param_n_estimators param_max_leaf_nodes param_learning_rate \n", + "6 500 100 0.709894 \\\n", + "17 500 5 0.771785 \n", + "1 200 20 0.160519 \n", + "12 200 50 0.110585 \n", + "10 200 20 0.109889 \n", + "18 10 5 0.637819 \n", + "3 500 2 0.07502 \n", + "4 100 5 0.0351 \n", + "19 5 20 0.202432 \n", + "9 10 5 0.088556 \n", + "8 5 2 0.462636 \n", + "15 50 100 0.010904 \n", + "2 5 100 0.070357 \n", + "16 2 50 0.167568 \n", + "5 2 2 0.421054 \n", + "11 1 5 0.190477 \n", + "13 5 20 0.033815 \n", + "0 1 100 0.125207 \n", + "14 1 10 0.081715 \n", + "7 1 20 0.014937 \n", + "\n", + " mean_test_error std_test_error \n", + "6 0.257099 0.012799 \n", + "17 0.269003 0.009152 \n", + "1 0.293038 0.010013 \n", + "12 0.321914 0.007619 \n", + "10 0.322681 0.008026 \n", + "18 0.432431 0.011625 \n", + "3 0.469790 0.011961 \n", + "4 0.477876 0.011579 \n", + "19 0.559196 0.017641 \n", + "9 0.587139 0.017203 \n", + "8 0.603163 0.014503 \n", + "15 0.611627 0.016837 \n", + "2 0.650331 0.017045 \n", + "16 0.650802 0.017337 \n", + "5 0.659051 0.017061 \n", + "11 0.696568 0.016156 \n", + "13 0.698969 0.016434 \n", + "0 0.709805 0.016143 \n", + "14 0.723857 0.015875 \n", + "7 0.745432 0.015467 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# from scipy.stats import loguniform\n", + "# from sklearn.model_selection import RandomizedSearchCV\n", + "# from sklearn.ensemble import GradientBoostingRegressor\n", + "\n", + "# param_distributions = {\n", + "# \"n_estimators\": [1, 2, 5, 10, 20, 50, 100, 200, 500],\n", + "# \"max_leaf_nodes\": [2, 5, 10, 20, 50, 100],\n", + "# \"learning_rate\": loguniform(0.01, 1),\n", + "# }\n", + "# search_cv = RandomizedSearchCV(\n", + "# GradientBoostingRegressor(), param_distributions=param_distributions,\n", + "# scoring=\"neg_mean_absolute_error\", n_iter=20, random_state=0, n_jobs=2\n", + "# )\n", + "# search_cv.fit(X_train, y_train)\n", + "\n", + "# columns = [f\"param_{name}\" for name in param_distributions.keys()]\n", + "# columns += [\"mean_test_error\", \"std_test_error\"]\n", + "# cv_results = pd.DataFrame(search_cv.cv_results_)\n", + "# cv_results[\"mean_test_error\"] = -cv_results[\"mean_test_score\"]\n", + "# cv_results[\"std_test_error\"] = cv_results[\"std_test_score\"]\n", + "# cv_results[columns].sort_values(by=\"mean_test_error\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 5. Metric Evaluation</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[597, 3, 0],\n", + " [ 1, 290, 9],\n", + " [ 6, 15, 279]], dtype=int64)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "confusion_matrix(y_test, y_pred)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.99 0.99 0.99 600\n", + " 1 0.94 0.97 0.95 300\n", + " 2 0.97 0.93 0.95 300\n", + "\n", + " accuracy 0.97 1200\n", + " macro avg 0.97 0.96 0.96 1200\n", + "weighted avg 0.97 0.97 0.97 1200\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8WElEQVR4nO3deXxU1fnH8e9k3wMBkxAJm8gS2RQQUldsJFKqIvizVdSIaFsMKFCoUmVXY7FuKIpVJNhK3VFJFY0IASSgBGNZIyCYQEgAAwmJZJu5vz9oxk4BzTCTTGbu5/163ZfMvefceYZgnnnOOfdei2EYhgAAgM/y83QAAACgaZHsAQDwcSR7AAB8HMkeAAAfR7IHAMDHkewBAPBxJHsAAHxcgKcDcIXNZlNxcbEiIyNlsVg8HQ4AwEmGYej48eNKSEiQn1/T1Z/V1dWqra11+TxBQUEKCQlxQ0TNy6uTfXFxsRITEz0dBgDARUVFRWrfvn2TnLu6ulqdO0ao5JDV5XPFx8dr7969XpfwvTrZR0ZGSpK+29xJURHMSPi6G7r38XQIaE7c3NMU6lWndfrQ/vu8KdTW1qrkkFXf5XVSVOTZ54qK4zZ17L9PtbW1JPvm1DB0HxXh59IPEN4hwBLo6RDQrEj2pvCfH3NzTMVGRFoUEXn272OT904Xe3WyBwCgsayGTVYXvkNaDZv7gmlmJHsAgCnYZMjmwoiRK309jbFvAAB8HJU9AMAUbLLJlYF413p7FskeAGAKVsOQ1YWrPFzp62kM4wMA4OOo7AEApmDmBXokewCAKdhkyGrSZM8wPgAAPo7KHgBgCgzjAwDg41iNDwAAfBaVPQDAFGz/2Vzp761I9gAAU7C6uBrflb6eRrIHAJiC1ZCLT71zXyzNjTl7AAB8HJU9AMAUmLMHAMDH2WSRVRaX+nsrhvEBAPBxVPYAAFOwGSc3V/p7K5I9AMAUrC4O47vS19MYxgcAwMdR2QMATMHMlT3JHgBgCjbDIpvhwmp8F/p6GsP4AAD4OCp7AIApMIwPAICPs8pPVhcGtK1ujKW5kewBAKZguDhnbzBnDwAAWioqewCAKTBnDwCAj7MafrIaLszZe/HtchnGBwDAx1HZAwBMwSaLbC7UuDZ5b2lPsgcAmIKZ5+wZxgcAwMdR2QMATMH1BXoM4wMA0KKdnLN34UE4DOMDAICWisoeAGAKNhfvjc9qfAAAWjjm7AEA8HE2+Zn2Onvm7AEA8HFU9gAAU7AaFlldeEytK309jWQPADAFq4sL9KwM4wMAgJaKyh4AYAo2w082F1bj21iNDwBAy8YwPgAA8FlU9gAAU7DJtRX1NveF0uxI9gAAU3D9pjreOxjuvZEDAIBGobIHAJiC6/fG9976mGQPADAFMz/PnmQPADAFM1f23hu5j/n7X+OVmtDPYRt7WQ/78eJ9QZp9Zyfd1KuXbujWWw//vqOOHv7xu9rX6yNO6d+wFeSHeuIjwQW/vv2IXsjeqXd3/lvv7vy3nvrgGw0YUuHpsNCErr3jiJZs3K7l3/5bz2TtUvd+P3g6JLho1qxZslgsDluPHj/+Xq+urlZ6erratGmjiIgIjRo1SqWlpQ7nKCws1PDhwxUWFqbY2FhNnTpV9fX1TsfSIir7BQsW6PHHH1dJSYn69u2rZ599VhdffLGnw2p2Hbuf0GNv7LG/9vc/eQOH6h/89Oebz1OXpBP6y1u7JUlL5rXTjLTOeiZrl/z8pKQBVfpn/laH8y2Z10756yLUre+J5vsQcIvDBwP1SkaCDuwNlsVi6Or/O6pZr+xVemo3ffcNX958zRXXHdXvZhbr2Qfaa+fmMN1w92E9svRbjb2su8q/D/R0eD7D9ZvqON/3ggsu0Keffmp/HRDwY9qdNGmS/vWvf+mtt95SdHS0xo8fr5EjR+rzzz8/+X5Wq4YPH674+HitX79eBw8e1O23367AwEA9+uijTsXh8cr+jTfe0OTJkzVz5kxt3rxZffv2VWpqqg4dOuTp0Jqdv78UE1tv36LbWCVJ274IV2lRkP74dKE696xW557VmvrMd9r1dZjy10VIkgKDDIe+Ua3rlftxlIb+pkwW751mMq2N2dH68rMoFe8N1oFvQ5T5l3aqrvJTj4uo9nzRyN8d0YqlMfrkjRgV7grR/Pvbq+aERak3l3k6NJ9iMywub5JUUVHhsNXU1JzxPQMCAhQfH2/f2rZtK0kqLy/XokWL9OSTT+qqq65S//79tXjxYq1fv14bNmyQJH3yySfavn27/vGPf6hfv34aNmyY5s6dqwULFqi2ttapz+7xZP/kk0/q7rvv1pgxY5SUlKSFCxcqLCxMr7zyiqdDa3YH9gbp5gsvUNrgnnosvYMO7T/5jb6u1iJZTib0BoHBhix+0rYvIk57rtxPonX8aICG/oZfFt7Oz8/QFdcdVXCYTTvywj0dDtwsINCm8/v8oM1rI+37DMOir9ZGKqk/X+5aosTEREVHR9u3jIyMM7bdtWuXEhIS1KVLF40ePVqFhYWSpLy8PNXV1SklJcXetkePHurQoYNyc3MlSbm5uerdu7fi4uLsbVJTU1VRUaFt27Y5FbNHh/Fra2uVl5enadOm2ff5+fkpJSXF/mH/W01NjcM3qIoK35nD7HFRlaY8fULtz6tR2aFA/eOJeP3xhvP14qqd6tG/SiFhNi16JEFjHiiWZNGiR9rJZrWo7NDpf4Qf/7ON+l95XOck1DXvB4HbdOpxQk9/sEtBwTadqPLTnLs6q3BXiKfDgptFxVjlHyAdO+z4//LRIwFK7HrmihHOs7k4jN9wU52ioiJFRUXZ9wcHB5+2/aBBg5SZmanu3bvr4MGDmj17ti677DJt3bpVJSUlCgoKUqtWrRz6xMXFqaSkRJJUUlLikOgbjjccc4ZHk/2RI0dktVpP+2F27tx5SvuMjAzNnj27ucJrVgOvOm7/c5ekavW48AfddnGS1nzQStfcUqaHXtynZ6e11/uL2sriJw0ZcVRde/8gy2n+3R4uDlTe6kj9+cV9zfcB4Hb79wTrnqHdFRZp1WXDj2nK099p6qjzSfjAWXL9qXcn+0ZFRTkk+zMZNmyY/c99+vTRoEGD1LFjR7355psKDW3etTceH8Z3xrRp01ReXm7fioqKPB1Sk4mItqp9lxoV7zv5jbH/lceVmbtDb/x7q97aulV/erZQ35cEql2HU7/5f/JGjCJb1yt5aHlzhw03qq/zU/G+YO3eEqbFjyVo7/ZQjbjrsKfDgptVlPnLWi+1OsdxhXXrtvUOV9zA+7Vq1UrdunXT7t27FR8fr9raWh07dsyhTWlpqeLj4yVJ8fHxp6zOb3jd0KaxPJrs27ZtK39//9N+mNN9kODgYPs3qsZ+s/JWJ6r8VPxdkGJiHYfho9tYFRFtVf66CB07EqDBQx2nMgzjZLJPufGoAljE61MsflJgkDc/igOnU1/np13/DtOFl/44umexGOp3aaW254V5MDLfY5XF5c0VlZWV2rNnj9q1a6f+/fsrMDBQK1eutB8vKChQYWGhkpOTJUnJycnasmWLw4L17OxsRUVFKSkpyan39miyDwoKUv/+/R0+rM1m08qVK+0f1iz+NjtB/84NV0lRkLZ9GabZd3aWv5905Q1HJUkfvx6jHXlhKt4XpJXvtNbDv++kG353+JQ5vfx1ESopDNY1t3zviY8BNxnzQLF6DapUXPsadepxQmMeKFaf5EqtejfG06GhCbz7t7YadkuZUv6vTIldqzXhsf0KCbPpk9f5ebtTwzC+K5szpkyZopycHO3bt0/r16/XDTfcIH9/f918882Kjo7W2LFjNXnyZK1atUp5eXkaM2aMkpOTNXjwYEnS0KFDlZSUpNtuu01ff/21Pv74Yz300ENKT08/4zqBM/H4GNHkyZOVlpamAQMG6OKLL9bTTz+tqqoqjRkzxtOhNasjBwOVcU8nHT/qr+g29bpgYJWezvpGrf5z+d3+PcFanNFOx4/5Ky6xVjffW6qRvzt1SHfFP9soaUClOpzPwh5v1qptvaY+851iYuv1w3F/7d0RogdvOc9hxTZ8R84HrRXdxqrbp5ao9Tn1+nZbqB4c3VnHjjA8583279+vm2++Wd9//73OOeccXXrppdqwYYPOOeccSdJTTz0lPz8/jRo1SjU1NUpNTdXzzz9v7+/v76+srCyNGzdOycnJCg8PV1pamubMmeN0LBbDMIyfb9a0nnvuOftNdfr166f58+dr0KBBP9uvoqJC0dHROvpNF0VFetXyA5yF1HMv9HQIaE6e/9WEZlBv1Gm13ld5eXmTTc025IoZG1MUEnH2X6CqK+s0Z9CnTRprU/F4ZS9J48eP1/jx4z0dBgDAh7lrNb43ahHJHgCApsaDcAAAgM+isgcAmILh4vPsDZ5nDwBAy8YwPgAA8FlU9gAAU/jvx9SebX9vRbIHAJiC1cWn3rnS19O8N3IAANAoVPYAAFNgGB8AAB9nk59sLgxou9LX07w3cgAA0ChU9gAAU7AaFlldGIp3pa+nkewBAKbAnD0AAD7OcPGpdwZ30AMAAC0VlT0AwBSsssjqwsNsXOnraSR7AIAp2AzX5t1thhuDaWYM4wMA4OOo7AEApmBzcYGeK309jWQPADAFmyyyuTDv7kpfT/PerykAAKBRqOwBAKbAHfQAAPBxZp6z997IAQBAo1DZAwBMwSYX743vxQv0SPYAAFMwXFyNb5DsAQBo2cz81Dvm7AEA8HFU9gAAUzDzanySPQDAFBjGBwAAPovKHgBgCma+Nz7JHgBgCgzjAwAAn0VlDwAwBTNX9iR7AIApmDnZM4wPAICPo7IHAJiCmSt7kj0AwBQMuXb5nOG+UJodyR4AYApmruyZswcAwMdR2QMATMHMlT3JHgBgCmZO9gzjAwDg46jsAQCmYObKnmQPADAFw7DIcCFhu9LX0xjGBwDAx1HZAwBMgefZAwDg48w8Z88wPgAATeyxxx6TxWLRxIkT7fuqq6uVnp6uNm3aKCIiQqNGjVJpaalDv8LCQg0fPlxhYWGKjY3V1KlTVV9f7/T7k+wBAKbQsEDPle1sfPnll3rxxRfVp08fh/2TJk3S8uXL9dZbbyknJ0fFxcUaOXKk/bjVatXw4cNVW1ur9evXa8mSJcrMzNSMGTOcjoFkDwAwhYZhfFc2Z1VWVmr06NF66aWX1Lp1a/v+8vJyLVq0SE8++aSuuuoq9e/fX4sXL9b69eu1YcMGSdInn3yi7du36x//+If69eunYcOGae7cuVqwYIFqa2udioNkDwAwBXdV9hUVFQ5bTU3NGd8zPT1dw4cPV0pKisP+vLw81dXVOezv0aOHOnTooNzcXElSbm6uevfurbi4OHub1NRUVVRUaNu2bU59dpI9AABOSExMVHR0tH3LyMg4bbvXX39dmzdvPu3xkpISBQUFqVWrVg774+LiVFJSYm/z34m+4XjDMWf4xGr8G7r1VoAl0NNhoInteXywp0NAM+o67UtPh4BmYDEMyfn1ZmfFcHE1fkNlX1RUpKioKPv+4ODgU9oWFRXpvvvuU3Z2tkJCQs76Pd2Fyh4AYAqGJMNwYfvPeaKiohy20yX7vLw8HTp0SBdddJECAgIUEBCgnJwczZ8/XwEBAYqLi1Ntba2OHTvm0K+0tFTx8fGSpPj4+FNW5ze8bmjTWCR7AADc7Je//KW2bNmi/Px8+zZgwACNHj3a/ufAwECtXLnS3qegoECFhYVKTk6WJCUnJ2vLli06dOiQvU12draioqKUlJTkVDw+MYwPAMDPsckiSzPdQS8yMlK9evVy2BceHq42bdrY948dO1aTJ09WTEyMoqKiNGHCBCUnJ2vw4JNTlkOHDlVSUpJuu+02zZs3TyUlJXrooYeUnp5+2tGEn0KyBwCYQkt7EM5TTz0lPz8/jRo1SjU1NUpNTdXzzz9vP+7v76+srCyNGzdOycnJCg8PV1pamubMmeP0e5HsAQBoBqtXr3Z4HRISogULFmjBggVn7NOxY0d9+OGHLr83yR4AYAo2wyKLSe+NT7IHAJhCw6p6V/p7K1bjAwDg46jsAQCm0NIW6DUnkj0AwBRI9gAA+DgzL9Bjzh4AAB9HZQ8AMAUzr8Yn2QMATOFksndlzt6NwTQzhvEBAPBxVPYAAFNgNT4AAD7O0I/PpD/b/t6KYXwAAHwclT0AwBQYxgcAwNeZeByfZA8AMAcXK3t5cWXPnD0AAD6Oyh4AYArcQQ8AAB9n5gV6DOMDAODjqOwBAOZgWFxbZOfFlT3JHgBgCmaes2cYHwAAH0dlDwAwB26qAwCAbzPzavxGJfsPPvig0Se87rrrzjoYAADgfo1K9iNGjGjUySwWi6xWqyvxAADQdLx4KN4VjUr2NputqeMAAKBJmXkY36XV+NXV1e6KAwCApmW4YfNSTid7q9WquXPn6txzz1VERIS+/fZbSdL06dO1aNEitwcIAABc43Syf+SRR5SZmal58+YpKCjIvr9Xr156+eWX3RocAADuY3HD5p2cTvavvvqq/va3v2n06NHy9/e37+/bt6927tzp1uAAAHAbhvEb78CBA+ratesp+202m+rq6twSFAAAcB+nk31SUpLWrl17yv63335bF154oVuCAgDA7Uxc2Tt9B70ZM2YoLS1NBw4ckM1m07vvvquCggK9+uqrysrKaooYAQBwnYmfeud0ZX/99ddr+fLl+vTTTxUeHq4ZM2Zox44dWr58ua6++uqmiBEAALjgrO6Nf9lllyk7O9vdsQAA0GTM/Ijbs34QzqZNm7Rjxw5JJ+fx+/fv77agAABwO55613j79+/XzTffrM8//1ytWrWSJB07dky/+MUv9Prrr6t9+/bujhEAALjA6Tn7u+66S3V1ddqxY4fKyspUVlamHTt2yGaz6a677mqKGAEAcF3DAj1XNi/ldGWfk5Oj9evXq3v37vZ93bt317PPPqvLLrvMrcEBAOAuFuPk5kp/b+V0sk9MTDztzXOsVqsSEhLcEhQAAG5n4jl7p4fxH3/8cU2YMEGbNm2y79u0aZPuu+8+/fWvf3VrcAAAwHWNquxbt24ti+XHuYqqqioNGjRIAQEnu9fX1ysgIEB33nmnRowY0SSBAgDgEhPfVKdRyf7pp59u4jAAAGhiJh7Gb1SyT0tLa+o4AABAEznrm+pIUnV1tWprax32RUVFuRQQAABNwsSVvdML9KqqqjR+/HjFxsYqPDxcrVu3dtgAAGiRTPzUO6eT/Z/+9Cd99tlneuGFFxQcHKyXX35Zs2fPVkJCgl599dWmiBEAALjA6WH85cuX69VXX9WVV16pMWPG6LLLLlPXrl3VsWNHvfbaaxo9enRTxAkAgGtMvBrf6cq+rKxMXbp0kXRyfr6srEySdOmll2rNmjXujQ4AADdpuIOeK5u3cjrZd+nSRXv37pUk9ejRQ2+++aakkxV/w4Nx0DR6DarU7CV7tXTzNn1c/LWSryn3dEg4C3+4YLPeveYd5d+0SBtHZeqFy1eoc+QxhzYdIsr1/OUr9MWoTOXftEjzL/1EbUJ+cGgTHVStJ37xqfJvWqTN//eKMgatVljAqXe3RMsXGm7V72cWacn6LXr/m8168t2d6tanytNhwUUvvPCC+vTpo6ioKEVFRSk5OVkfffSR/Xh1dbXS09PVpk0bRUREaNSoUSotLXU4R2FhoYYPH66wsDDFxsZq6tSpqq+vdzoWp5P9mDFj9PXXX0uSHnjgAS1YsEAhISGaNGmSpk6d6tS51qxZo2uvvVYJCQmyWCx67733nA3HVELCbPp2W4ie+zNPFvRmF8ce1D++uUD/9/ENSlv5awX42ZT5yyyF+p9M1KH+dcq86l8yDItuXXmtbvpkhAL9bPrbFR/J8l8rhJ68ZKXOjz6qtJW/1t2rh2lgbLEeHpTjqY8FF0yc950uuqxCj0/spD9cnaTNa6OUsfQbtYmr/fnOaLxmXqDXvn17PfbYY8rLy9OmTZt01VVX6frrr9e2bdskSZMmTdLy5cv11ltvKScnR8XFxRo5cqS9v9Vq1fDhw1VbW6v169dryZIlyszM1IwZM5z+6E7P2U+aNMn+55SUFO3cuVN5eXnq2rWr+vTp49S5qqqq1LdvX915550OHxCnt2lVlDat4tJGb3fnquEOr+/PHaIvblyiXm0O68tDCep/TonODT+u6z68UZX1QZKkqblDtPn/Fis5/oDWl7TXeVFHdUVCkUZ8NFJby2IlSXM2XaqXh3yoxzYn69CJ8Gb/XDg7QcE2XTrsqGbf1VVbv4iUJP3jqQQNSinXr287rCV/PdfDEeJsXXvttQ6vH3nkEb3wwgvasGGD2rdvr0WLFmnp0qW66qqrJEmLFy9Wz549tWHDBg0ePFiffPKJtm/frk8//VRxcXHq16+f5s6dq/vvv1+zZs1SUFBQo2Nx6Tp7SerYsaM6dux4Vn2HDRumYcOGuRoC4NUiA09Wb8dqQiRJQf5WGZJqbf72NrXWANkMiwacc1DrS9rrwralKq8Jsid6Sfq8pL1shkV92xxS9v7OzfoZcPb8Awz5B0i1NY6Lv2qrLbpgYKWHovJNFrn41Lv//LeiosJhf3BwsIKDg3+yr9Vq1VtvvaWqqiolJycrLy9PdXV1SklJsbfp0aOHOnTooNzcXA0ePFi5ubnq3bu34uLi7G1SU1M1btw4bdu2TRdeeGGjY29Usp8/f36jT3jvvfc2uq2zampqVFNTY3/9v3/hgLexyNCDAz7XpkPx2lUeI0nKPxKnE/WBmnrhBj2Rf7EskqZeuFEBfobOCT05b39O6A/6vibU4VxWw0/ltcH2NvAOJ6r8tX1TuG6596AKd4fo2OFAXXl9mXpcVKWD+346gcAzEhMTHV7PnDlTs2bNOm3bLVu2KDk5WdXV1YqIiNCyZcuUlJSk/Px8BQUFnbLWLS4uTiUlJZKkkpISh0TfcLzhmDMaleyfeuqpRp3MYrE0abLPyMjQ7Nmzm+z8QHObNXCtukWX6befjLDvK6sJ1YS1V2vOxWuV1n2LbIZFWd911dbv28rmxZf+4Mwen9RZkx7fp6VfbpG1Xtq9NUw578eoa2++uLmVmy69Kyoqcrhb7E9V9d27d1d+fr7Ky8v19ttvKy0tTTk5zb+2plHJvmH1vadNmzZNkydPtr+uqKg45RsW4C1mDlirq879TjdnX6+SExEOx9aVJOqqD25R6+ATqrf56XhdsHJHLlHRdyd/wRw+EaY2wScc+vhbbIoOqtHhE2HN9hngHge/C9afbuqu4FCrwiNtKjsUqGkLvlVJYePnZNEIbrpdbsPq+sYICgpS165dJUn9+/fXl19+qWeeeUa/+c1vVFtbq2PHjjlU96WlpYqPj5ckxcfH64svvnA4X8Nq/YY2jeX0anxPCg4Otv8lO/OXDbQshmYOWKurE/fq1pXXan/Vmf8dH60J1fG6YA2OO6A2ISe0cn8nSdJXR+IUHVyrC2IO29smxx2Qn8XQ19/HnuFsaOlqTvir7FCgIqLr1f/yCuVmt/J0SHAzm82mmpoa9e/fX4GBgVq5cqX9WEFBgQoLC5WcnCxJSk5O1pYtW3To0CF7m+zsbEVFRSkpKcmp93V5gR6aT0iYVQmdf7wUJz6xVl0uOKHjx/x1+AAVgLeYPXCtru20W3/IuUZVdUFq+5/r54/XBanGevJ/yVFddmpPeWuV1YTowralemjA51q8s4/2Hm8lSdpT0Vo5xYl6dFCOpn9xmQL8bJo5cJ2yvuvKSnwv1P/ycski7f82RAmdanTXn/eraE+IPnmzradD8y3N/CCcadOmadiwYerQoYOOHz+upUuXavXq1fr4448VHR2tsWPHavLkyYqJiVFUVJQmTJig5ORkDR48WJI0dOhQJSUl6bbbbtO8efNUUlKihx56SOnp6T+7IPB/eTTZV1ZWavfu3fbXe/fuVX5+vmJiYtShQwcPRtYydet7Qo+/s8f++g+ziyVJn7zRWk9M4u/LW4zutl2StPTqDxz2/yn3Sr37bQ9JUpeoY5rSb6Oig2p0oCpSL2y9SK/sdLy0dfLnv9TMgev06i+zZBgWrSjqrLmbLm2eDwG3Couyasz9B9Q2vk6V5f5a92FrZT5+rqz1rNFwJ1fvguds30OHDun222/XwYMHFR0drT59+ujjjz/W1VdfLenkejg/Pz+NGjVKNTU1Sk1N1fPPP2/v7+/vr6ysLI0bN07JyckKDw9XWlqa5syZcxaxG4bHbgC4evVqDRky5JT9aWlpyszM/Nn+FRUVio6O1pW6XgGWwCaIEC3JnseTPR0CmlHXaV96OgQ0g3qjTqvq31F5eXmTTc025IpOjzwiv5CQsz6Prbpa+x58sEljbSoereyvvPJKefC7BgDATHievXPWrl2rW2+9VcnJyTpw4IAk6e9//7vWrVvn1uAAAHAbnmffeO+8845SU1MVGhqqr776yn6Tm/Lycj366KNuDxAAALjG6WT/8MMPa+HChXrppZcUGPjjPPkll1yizZs3uzU4AADcxcyPuHV6zr6goECXX375Kfujo6N17Ngxd8QEAID7uekOet7I6co+Pj7e4XK5BuvWrVOXLl3cEhQAAG7HnH3j3X333brvvvu0ceNGWSwWFRcX67XXXtOUKVM0bty4pogRAAC4wOlh/AceeEA2m02//OUv9cMPP+jyyy9XcHCwpkyZogkTJjRFjAAAuKy5b6rTkjid7C0Wix588EFNnTpVu3fvVmVlpZKSkhQREfHznQEA8BQTX2d/1jfVCQoKcvpG/AAAoPk5neyHDBkii+XMKxI/++wzlwICAKBJuHr5nJkq+379+jm8rqurU35+vrZu3aq0tDR3xQUAgHsxjN94Tz311Gn3z5o1S5WVlS4HBAAA3Ous7o1/OrfeeqteeeUVd50OAAD3MvF19m576l1ubq5CXHh0IAAATYlL75wwcuRIh9eGYejgwYPatGmTpk+f7rbAAACAezid7KOjox1e+/n5qXv37pozZ46GDh3qtsAAAIB7OJXsrVarxowZo969e6t169ZNFRMAAO5n4tX4Ti3Q8/f319ChQ3m6HQDA65j5EbdOr8bv1auXvv3226aIBQAANAGnk/3DDz+sKVOmKCsrSwcPHlRFRYXDBgBAi2XCy+4kJ+bs58yZoz/+8Y/61a9+JUm67rrrHG6baxiGLBaLrFar+6MEAMBVJp6zb3Synz17tv7whz9o1apVTRkPAABws0Yne8M4+ZXmiiuuaLJgAABoKtxUp5F+6ml3AAC0aAzjN063bt1+NuGXlZW5FBAAAHAvp5L97NmzT7mDHgAA3oBh/Eb67W9/q9jY2KaKBQCApmPiYfxGX2fPfD0AAN7J6dX4AAB4JRNX9o1O9jabrSnjAACgSTFnDwCArzNxZe/0vfEBAIB3obIHAJiDiSt7kj0AwBTMPGfPMD4AAD6Oyh4AYA4M4wMA4NsYxgcAAD6Lyh4AYA4M4wMA4ONMnOwZxgcAwMdR2QMATMHyn82V/t6KZA8AMAcTD+OT7AEApsCldwAAwGdR2QMAzIFhfAAATMCLE7YrGMYHAMDHUdkDAEzBzAv0SPYAAHMw8Zw9w/gAADSBjIwMDRw4UJGRkYqNjdWIESNUUFDg0Ka6ulrp6elq06aNIiIiNGrUKJWWljq0KSws1PDhwxUWFqbY2FhNnTpV9fX1TsVCsgcAmELDML4rmzNycnKUnp6uDRs2KDs7W3V1dRo6dKiqqqrsbSZNmqTly5frrbfeUk5OjoqLizVy5Ej7cavVquHDh6u2tlbr16/XkiVLlJmZqRkzZjgVC8P4AABzaOZh/BUrVji8zszMVGxsrPLy8nT55ZervLxcixYt0tKlS3XVVVdJkhYvXqyePXtqw4YNGjx4sD755BNt375dn376qeLi4tSvXz/NnTtX999/v2bNmqWgoKBGxUJlDwCAEyoqKhy2mpqaRvUrLy+XJMXExEiS8vLyVFdXp5SUFHubHj16qEOHDsrNzZUk5ebmqnfv3oqLi7O3SU1NVUVFhbZt29bomH2jsvfzlyz+no4CTez86V95OgQ0oz1zB3o6BDQDW3W1NOudZnkvd63GT0xMdNg/c+ZMzZo16yf72mw2TZw4UZdccol69eolSSopKVFQUJBatWrl0DYuLk4lJSX2Nv+d6BuONxxrLN9I9gAA/Bw3DeMXFRUpKirKvjs4OPhnu6anp2vr1q1at26dCwGcPYbxAQDmYLhhkxQVFeWw/VyyHz9+vLKysrRq1Sq1b9/evj8+Pl61tbU6duyYQ/vS0lLFx8fb2/zv6vyG1w1tGoNkDwBAEzAMQ+PHj9eyZcv02WefqXPnzg7H+/fvr8DAQK1cudK+r6CgQIWFhUpOTpYkJScna8uWLTp06JC9TXZ2tqKiopSUlNToWBjGBwCYQnPfQS89PV1Lly7V+++/r8jISPsce3R0tEJDQxUdHa2xY8dq8uTJiomJUVRUlCZMmKDk5GQNHjxYkjR06FAlJSXptttu07x581RSUqKHHnpI6enpjZo+aECyBwCYQzNfevfCCy9Ikq688kqH/YsXL9Ydd9whSXrqqafk5+enUaNGqaamRqmpqXr++eftbf39/ZWVlaVx48YpOTlZ4eHhSktL05w5c5yKhWQPAEATMIyf/3YQEhKiBQsWaMGCBWds07FjR3344YcuxUKyBwCYgsUwZGlEAv6p/t6KZA8AMAcehAMAAHwVlT0AwBR4nj0AAL6OYXwAAOCrqOwBAKbAMD4AAL7OxMP4JHsAgCmYubJnzh4AAB9HZQ8AMAeG8QEA8H3ePBTvCobxAQDwcVT2AABzMIyTmyv9vRTJHgBgCqzGBwAAPovKHgBgDqzGBwDAt1lsJzdX+nsrhvEBAPBxVPYAAHNgGB8AAN9m5tX4JHsAgDmY+Dp75uwBAPBxVPYAAFNgGB8AAF9n4gV6DOMDAODjqOwBAKbAMD4AAL6O1fgAAMBXUdkDAEyBYXwAAHwdq/EBAICvorIHAJgCw/gAAPg6m3Fyc6W/lyLZAwDMgTl7AADgq6jsAQCmYJGLc/Zui6T5kewBAObAHfQAAICvorIHAJgCl94BAODrWI0PAAB8FZU9AMAULIYhiwuL7Fzp62kkewCAOdj+s7nS30sxjA8AgI+jsgcAmALD+AAA+DoTr8Yn2QMAzIE76AEAAF9FZQ8AMAXuoAev0Ca+VmP/fEADh1QoONSm4n3BemJyR+36d7inQ4OLeg2s0I2/O6iuvarUJq5Oc35/vnKzY+zHJ8/bo6tvPOLQZ1NOtKaP6dHcocIJv++1WUM77lXn6GOqqffXV4fj9XjeYO2taCVJOje8QqtuXHravveuvlorvjtPkpQcv1/3XfilurUu04n6AC3b3V1PfXWxrAaDs05hGB8tXUR0vZ5c9o2sdRY9dFtX3T0kSX+b016V5Xxf8wUhYTZ9uyNMz8/sdMY2X66O1i0XX2jf/nJf1+YLEGdlYPxB/WPnBbrpwxs0JvvXCvCz6ZWrsxQaUCdJOvhDhH7xxu0O2zNfDVBlXaDWHOggSerR+oheSvlQaw8kasTyGzUx52r9MnGfpvTf6MmPhkZYs2aNrr32WiUkJMhisei9995zOG4YhmbMmKF27dopNDRUKSkp2rVrl0ObsrIyjR49WlFRUWrVqpXGjh2ryspKp2PxaLLPyMjQwIEDFRkZqdjYWI0YMUIFBQWeDKnFuumeUh0pDtQTf+ykgvxwlRYFa/OaKB38LtjTocENNuW00qtPJmr9JzFnbFNX66ejR4LsW2UFX/Raurs+Ha5le3po97EY7TzaVvevG6JzIyp1QZvDkiSb4acj1WEO29Ud9uqjfefph/pASdKvOu1RwdE2WvDvASo8Hq0vSxM0L2+wRnffqvCAWk9+PK9jsbm+OaOqqkp9+/bVggULTnt83rx5mj9/vhYuXKiNGzcqPDxcqampqq6utrcZPXq0tm3bpuzsbGVlZWnNmjX63e9+5/Rn92iyz8nJUXp6ujZs2KDs7GzV1dVp6NChqqqq8mRYLdLgq8v1zb/D9eDCb/VG/r+1YMUODbvlyM93hM/oM7hC//wiTy99+rXGz92ryFZ1ng4JTooMOpmcy2tCTnv8gpjDSmrzvd7e9eP0TJC/VTVWf4d2NdYAhQRY7V8a0EgNw/iubE4YNmyYHn74Yd1www2nCcXQ008/rYceekjXX3+9+vTpo1dffVXFxcX2EYAdO3ZoxYoVevnllzVo0CBdeumlevbZZ/X666+ruLjYqVg8muxXrFihO+64QxdccIH69u2rzMxMFRYWKi8v77Tta2pqVFFR4bCZRbsONfr1bYdVvDdYfx7dVVl/P0fj5hQp5cbvPR0amkHemlb66x/P07TbeuqVvySq98UVmru4QH5+3juHaDYWGXpw4OfKK43XrmOnH8G58fwd2n2stb46HG/ft7Y4UReeU6rhnXfJz2JTXFil0vue/B15TtgPzRI7HP1vHqqpqXH6HHv37lVJSYlSUlLs+6KjozVo0CDl5uZKknJzc9WqVSsNGDDA3iYlJUV+fn7auNG5aZwWNWdfXl4uSYqJOf3/CBkZGYqOjrZviYmJzRmeR1n8pN1bw7T4L+dqz7YwffRaW320tK2G30Z1bwY5WW20cWVr7SsIU252jGbe1V3d+1apz2DzfOH1djMHr9X5rcs0cU3KaY8H+9fr2i67Hap6Sfq8OFHz8gZrzuC12nrrS/p4xOvK2X9yPt8wLE0et08x3LBJSkxMdMhFGRkZTodSUlIiSYqLi3PYHxcXZz9WUlKi2NhYh+MBAQGKiYmxt2msFjPpZ7PZNHHiRF1yySXq1avXadtMmzZNkydPtr+uqKgwTcIvOxSo73Y5Dv0V7QrRpb865pmA4FElRSEq/z5A7TpWK399tKfDwc+YMWithrT/TqNXXK/SHyJO2+aajt8qxL9ey/Z0O+XY4u19tXh7H8WG/qDy2mC1jziuKf03quh4VFOH7lPcdbvcoqIiRUX9+HcfHNzy1061mGSfnp6urVu3at26dWdsExwc7BV/qU1h+6ZwJXapdth3bpcaHdof5KGI4Elt42sU2bpeZYf4+bdshmYMWqerO+zVrSuu0/7KMyfnG8/foc+KOuloTegZWlh06MTJy2yHd96t4soIbStr2wQx4+dERUU5JPuzER9/cqqmtLRU7dq1s+8vLS1Vv3797G0OHTrk0K++vl5lZWX2/o3VIobxx48fr6ysLK1atUrt27f3dDgt0rsvxarHRVX67fgSJXSq1pARZfrV6CP6YMk5ng4NbhASZlWXnlXq0vPk4tS4xBp16VmlcxJqFBJm1dgHCtWj33HFnlujfr8o14wXv1HxdyHavJaqviWbOWitruuyS5PXpKiqLkhtQ35Q25AfFOxf79CuQ2S5BsYd1Fu7Tn/fhLEX5Ktbq+/VtVWZ7umTp9/1+koPf3GJbFxn75xmXqD3Uzp37qz4+HitXLnSvq+iokIbN25UcnKyJCk5OVnHjh1zWMf22WefyWazadCgQU69n0cre8MwNGHCBC1btkyrV69W586dPRlOi/bN1+Gac9d5GjPtgEZPPKiSoiAtnNVeq5ad+VIteI/ze1dp3j932F///qFCSVL222313PTO6tzjB6WMPKzwKKvKDgVq89povfpUoupq+WXfko3usV2S9No1Hzjsv3/dlVq258fEfmPXnSqpitC64tNPS15+bqHG9dmsID+rdh5to3tWXWO/Dh9OMOTaM+mdzPWVlZXavXu3/fXevXuVn5+vmJgYdejQQRMnTtTDDz+s888/X507d9b06dOVkJCgESNGSJJ69uypa665RnfffbcWLlyouro6jR8/Xr/97W+VkJDgVCwWw/DcLYHuueceLV26VO+//766d+9u3x8dHa3Q0DMNZf2ooqJC0dHRutJvpAIsgU0ZKloAvyB+xmayZ+aFng4BzcBWXa29sx5UeXm5y0PjZ9KQK6668AEF+J/+ssfGqLdW67OvHmt0rKtXr9aQIUNO2Z+WlqbMzEwZhqGZM2fqb3/7m44dO6ZLL71Uzz//vLp1+3HdRllZmcaPH6/ly5fLz89Po0aN0vz58xURcfq1H2fi0WRvsZx+JenixYt1xx13/Gx/kr25kOzNhWRvDr6c7FsSjw/jAwDQLAy5eG98t0XS7FrManwAAJoUD8IBAAC+isoeAGAONkmu3HTQlZX8HkayBwCYgrvuoOeNGMYHAMDHUdkDAMzBxAv0SPYAAHMwcbJnGB8AAB9HZQ8AMAcTV/YkewCAOXDpHQAAvo1L7wAAgM+isgcAmANz9gAA+DibIVlcSNg27032DOMDAODjqOwBAObAMD4AAL7OxWQv7032DOMDAODjqOwBAObAMD4AAD7OZsiloXhW4wMAgJaKyh4AYA6G7eTmSn8vRbIHAJgDc/YAAPg45uwBAICvorIHAJgDw/gAAPg4Qy4me7dF0uwYxgcAwMdR2QMAzIFhfAAAfJzNJsmFa+Vt3nudPcP4AAD4OCp7AIA5MIwPAICPM3GyZxgfAAAfR2UPADAHE98ul2QPADAFw7DJcOHJda709TSSPQDAHAzDteqcOXsAANBSUdkDAMzBcHHO3osre5I9AMAcbDbJ4sK8uxfP2TOMDwCAj6OyBwCYA8P4AAD4NsNmk+HCML43X3rHMD4AAD6Oyh4AYA4M4wMA4ONshmQxZ7JnGB8AAB9HZQ8AMAfDkOTKdfbeW9mT7AEApmDYDBkuDOMbJHsAAFo4wybXKnsuvQMAAKexYMECderUSSEhIRo0aJC++OKLZo+BZA8AMAXDZri8OeuNN97Q5MmTNXPmTG3evFl9+/ZVamqqDh061ASf8MxI9gAAczBsrm9OevLJJ3X33XdrzJgxSkpK0sKFCxUWFqZXXnmlCT7gmXn1nH3DYol6o87DkaA5+Hnx4hg4z1Zd7ekQ0Awafs7NsfitXnUu3VOnXidzTUVFhcP+4OBgBQcHn9K+trZWeXl5mjZtmn2fn5+fUlJSlJube/aBnAWvTvbHjx+XJK0zlrv0A4SX4He/ucx609MRoBkdP35c0dHRTXLuoKAgxcfHa13Jhy6fKyIiQomJiQ77Zs6cqVmzZp3S9siRI7JarYqLi3PYHxcXp507d7ocizO8OtknJCSoqKhIkZGRslgsng6n2VRUVCgxMVFFRUWKiorydDhoQvyszcOsP2vDMHT8+HElJCQ02XuEhIRo7969qq2tdflchmGckm9OV9W3NF6d7P38/NS+fXtPh+ExUVFRpvqlYGb8rM3DjD/rpqro/1tISIhCQkKa/H3+W9u2beXv76/S0lKH/aWlpYqPj2/WWFigBwBAEwgKClL//v21cuVK+z6bzaaVK1cqOTm5WWPx6soeAICWbPLkyUpLS9OAAQN08cUX6+mnn1ZVVZXGjBnTrHGQ7L1QcHCwZs6c6RXzRHANP2vz4Gftm37zm9/o8OHDmjFjhkpKStSvXz+tWLHilEV7Tc1iePPNfgEAwM9izh4AAB9HsgcAwMeR7AEA8HEkewAAfBzJ3su0hEcloumtWbNG1157rRISEmSxWPTee+95OiQ0kYyMDA0cOFCRkZGKjY3ViBEjVFBQ4Omw4GNI9l6kpTwqEU2vqqpKffv21YIFCzwdCppYTk6O0tPTtWHDBmVnZ6uurk5Dhw5VVVWVp0ODD+HSOy8yaNAgDRw4UM8995ykk3diSkxM1IQJE/TAAw94ODo0FYvFomXLlmnEiBGeDgXN4PDhw4qNjVVOTo4uv/xyT4cDH0Fl7yUaHpWYkpJi3+epRyUCaDrl5eWSpJiYGA9HAl9CsvcSP/WoxJKSEg9FBcCdbDabJk6cqEsuuUS9evXydDjwIdwuFwBaiPT0dG3dulXr1q3zdCjwMSR7L9GSHpUIwP3Gjx+vrKwsrVmzxtSP7kbTYBjfS7SkRyUCcB/DMDR+/HgtW7ZMn332mTp37uzpkOCDqOy9SEt5VCKaXmVlpXbv3m1/vXfvXuXn5ysmJkYdOnTwYGRwt/T0dC1dulTvv/++IiMj7WtwoqOjFRoa6uHo4Cu49M7LPPfcc3r88cftj0qcP3++Bg0a5Omw4GarV6/WkCFDTtmflpamzMzM5g8ITcZisZx2/+LFi3XHHXc0bzDwWSR7AAB8HHP2AAD4OJI9AAA+jmQPAICPI9kDAODjSPYAAPg4kj0AAD6OZA8AgI8j2QMA4ONI9oCL7rjjDo0YMcL++sorr9TEiRObPY7Vq1fLYrHo2LFjZ2xjsVj03nvvNfqcs2bNUr9+/VyKa9++fbJYLMrPz3fpPADOHskePumOO+6QxWKRxWJRUFCQunbtqjlz5qi+vr7J3/vdd9/V3LlzG9W2MQkaAFzFg3Dgs6655hotXrxYNTU1+vDDD5Wenq7AwEBNmzbtlLa1tbUKCgpyy/vGxMS45TwA4C5U9vBZwcHBio+PV8eOHTVu3DilpKTogw8+kPTj0PsjjzyihIQEde/eXZJUVFSkm266Sa1atVJMTIyuv/567du3z35Oq9WqyZMnq1WrVmrTpo3+9Kc/6X8fL/G/w/g1NTW6//77lZiYqODgYHXt2lWLFi3Svn377A+7ad26tSwWi/3BJzabTRkZGercubNCQ0PVt29fvf322w7v8+GHH6pbt24KDQ3VkCFDHOJsrPvvv1/dunVTWFiYunTpounTp6uuru6Udi+++KISExMVFhamm266SeXl5Q7HX375ZfXs2VMhISHq0aOHnn/+eadjAdB0SPYwjdDQUNXW1tpfr1y5UgUFBcrOzlZWVpbq6uqUmpqqyMhIrV27Vp9//rkiIiJ0zTXX2Ps98cQTyszM1CuvvKJ169aprKxMy5Yt+8n3vf322/XPf/5T8+fP144dO/Tiiy8qIiJCiYmJeueddyRJBQUFOnjwoJ555hlJUkZGhl599VUtXLhQ27Zt06RJk3TrrbcqJydH0skvJSNHjtS1116r/Px83XXXXXrggQec/juJjIxUZmamtm/frmeeeUYvvfSSnnrqKYc2u3fv1ptvvqnly5drxYoV+uqrr3TPPffYj7/22muaMWOGHnnkEe3YsUOPPvqopk+friVLljgdD4AmYgA+KC0tzbj++usNwzAMm81mZGdnG8HBwcaUKVPsx+Pi4oyamhp7n7///e9G9+7dDZvNZt9XU1NjhIaGGh9//LFhGIbRrl07Y968efbjdXV1Rvv27e3vZRiGccUVVxj33XefYRiGUVBQYEgysrOzTxvnqlWrDEnG0aNH7fuqq6uNsLAwY/369Q5tx44da9x8882GYRjGtGnTjKSkJIfj999//ynn+l+SjGXLlp3x+OOPP27079/f/nrmzJmGv7+/sX//fvu+jz76yPDz8zMOHjxoGIZhnHfeecbSpUsdzjN37lwjOTnZMAzD2Lt3ryHJ+Oqrr874vgCaFnP28FlZWVmKiIhQXV2dbDabbrnlFs2aNct+vHfv3g7z9F9//bV2796tyMhIh/NUV1drz549Ki8v18GDBzVo0CD7sYCAAA0YMOCUofwG+fn58vf31xVXXNHouHfv3q0ffvhBV199tcP+2tpaXXjhhZKkHTt2OMQhScnJyY1+jwZvvPGG5s+frz179qiyslL19fWKiopyaNOhQwede+65Du9js9lUUFCgyMhI7dmzR2PHjtXdd99tb1NfX6/o6Gin4wHQNEj28FlDhgzRCy+8oKCgICUkJCggwPGfe3h4uMPryspK9e/fX6+99top5zrnnHPOKobQ0FCn+1RWVkqS/vWvfzkkWenkOgR3yc3N1ejRozV79mylpqYqOjpar7/+up544gmnY33ppZdO+fLh7+/vtlgBuIZkD58VHh6url27Nrr9RRddpDfeeEOxsbGnVLcN2rVrp40bN+ryyy+XdLKCzcvL00UXXXTa9r1795bNZlNOTo5SUlJOOd4wsmC1Wu37kpKSFBwcrMLCwjOOCPTs2dO+2LDBhg0bfv5D/pf169erY8eOevDBB+37vvvuu1PaFRYWqri4WAkJCfb38fPzU/fu3RUXF6eEhAR9++23Gj16tFPvD6D5sEAP+I/Ro0erbdu2uv7667V27Vrt3btXq1ev1r333qv9+/dLku677z499thjeu+997Rz507dc889P3mNfKdOnZSWlqY777xT7733nv2cb775piSpY8eOslgsysrK0uHDh1VZWanIyEhNmTJFkyZN0pIlS7Rnzx5t3rxZzz77rH3R2x/+8Aft2rVLU6dOVUFBgZYuXarMzEynPu/555+vwsJCvf7669qzZ4/mz59/2sWGISEhSktL09dff621a9fq3nvv1U033aT4+HhJ0uzZs5WRkaH58+frm2++0ZYtW7R48WI9+eSTTsUDoOmQ7IH/CAsL05o1a9ShQweNHDlSPXv21NixY1VdXW2v9P/4xz/qtttuU1pampKTkxUZGakbbrjhJ8/7wgsv6MYbb9Q999yjHj166O6771ZVVZUk6dxzz9Xs2bP1wAMPKC4uTuPHj5ckzZ07V9OnT1dGRoZ69uypa665Rv/617/UuXNnSSfn0d955x2999576tu3rxYuXKhHH33Uqc973XXXadKkSRo/frz69eun9evXa/r06ae069q1q0aOHKlf/epXGjp0qPr06eNwad1dd92ll19+WYsXL1bv3r11xRVXKDMz0x4rAM+zGGdaWQQAAHwClT0AAD6OZA8AgI8j2QMA4ONI9gAA+DiSPQAAPo5kDwCAjyPZAwDg40j2AAD4OJI9AAA+jmQPAICPI9kDAODj/h8AY3qrAINnEgAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm =confusion_matrix(y_test, y_pred)\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n", + "disp.plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.95784204 0.9481439 0.93891587 0.95358077 0.96490525]\n", + " StratifiedKFold Cross-Validation Accuracy: 95.27% | Standard Deviation: 0.88%\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import StratifiedKFold\n", + "\n", + "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n", + "cvScore = cross_val_score(gb_clf, X_train, y_train, cv=skf, scoring='f1_macro')\n", + "print (cvScore)\n", + "print (\" StratifiedKFold Cross-Validation Accuracy: %0.2f%% | Standard Deviation: %0.2f%%\" % (100*cvScore.mean(), 100*cvScore.std()))\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 6. Testing Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\60172\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\\sklearn\\base.py:439: UserWarning: X does not have valid feature names, but GradientBoostingClassifier was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "predicted marker\n", + "1 42\n", + "0 31\n", + "2 27\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_testpred = gb_clf.predict(mTest.values)\n", + "y_testpred = pd.DataFrame(y_testpred, columns=['predicted marker'])\n", + "y_testpred.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_testpred.to_csv('testresult.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}