diff --git a/PartB_xgb_placing_the_best_param_-final.ipynb b/PartB_xgb_placing_the_best_param_-final.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..69d0efd3815c790cef9c7969e1ffe3de1713da67 --- /dev/null +++ b/PartB_xgb_placing_the_best_param_-final.ipynb @@ -0,0 +1,1730 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1. Loading Datasets</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[ TrainingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 6000 entries, 0 to 5999\n", + "Columns: 129 entries, R1-PA1:VH to marker\n", + "dtypes: float64(112), int64(17)\n", + "memory usage: 5.9 MB\n", + "\n", + "[ TestingDataMulti.csv info ]\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 100 entries, 0 to 99\n", + "Columns: 128 entries, R1-PA1:VH to snort_log4\n", + "dtypes: float64(104), int64(24)\n", + "memory usage: 100.1 KB\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "mTrain = pd.read_csv(\"TrainingDataMulti.csv\")\n", + "mTest = pd.read_csv(\"TestingDataMulti.csv\")\n", + "\n", + "\n", + "print(\"\\n[ TrainingDataMulti.csv info ]\")\n", + "mTrain.info()\n", + "\n", + "print(\"\\n[ TestingDataMulti.csv info ]\")\n", + "mTest.info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>1.1 Analysing the Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "R1-PA1:VH float64\n", + "R1-PM1:V float64\n", + "R1-PA2:VH float64\n", + "R1-PM2:V float64\n", + "R1-PA3:VH float64\n", + " ... \n", + "snort_log1 int64\n", + "snort_log2 int64\n", + "snort_log3 int64\n", + "snort_log4 int64\n", + "marker int64\n", + "Length: 129, dtype: object" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 3000\n", + "2 1500\n", + "1 1500\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain['marker'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " <th>marker</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 129 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log4 relay1_log relay2_log relay3_log relay4_log \n", + "0 0 0 0 0 0 \\\n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + " snort_log1 snort_log2 snort_log3 snort_log4 marker \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 0 0 0 0 0 \n", + "4 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "5995 0 0 0 0 0 \n", + "5996 0 0 0 0 0 \n", + "5997 0 0 0 0 0 \n", + "5998 0 0 0 0 0 \n", + "5999 0 0 0 0 0 \n", + "\n", + "[6000 rows x 129 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mTrain.isnull().sum()\n", + "mTrain = mTrain.dropna()\n", + "mTrain\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>R1-PA1:VH</th>\n", + " <th>R1-PM1:V</th>\n", + " <th>R1-PA2:VH</th>\n", + " <th>R1-PM2:V</th>\n", + " <th>R1-PA3:VH</th>\n", + " <th>R1-PM3:V</th>\n", + " <th>R1-PA4:IH</th>\n", + " <th>R1-PM4:I</th>\n", + " <th>R1-PA5:IH</th>\n", + " <th>R1-PM5:I</th>\n", + " <th>...</th>\n", + " <th>control_panel_log3</th>\n", + " <th>control_panel_log4</th>\n", + " <th>relay1_log</th>\n", + " <th>relay2_log</th>\n", + " <th>relay3_log</th>\n", + " <th>relay4_log</th>\n", + " <th>snort_log1</th>\n", + " <th>snort_log2</th>\n", + " <th>snort_log3</th>\n", + " <th>snort_log4</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>70.399324</td>\n", + " <td>127673.0908</td>\n", + " <td>-49.572308</td>\n", + " <td>127648.0176</td>\n", + " <td>-169.578319</td>\n", + " <td>127723.2374</td>\n", + " <td>65.689611</td>\n", + " <td>605.91099</td>\n", + " <td>-57.003571</td>\n", + " <td>626.78553</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>73.688102</td>\n", + " <td>130280.7109</td>\n", + " <td>-46.300719</td>\n", + " <td>130255.6377</td>\n", + " <td>-166.278082</td>\n", + " <td>130355.9307</td>\n", + " <td>71.831719</td>\n", + " <td>483.59351</td>\n", + " <td>-50.947407</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>73.733939</td>\n", + " <td>130305.7842</td>\n", + " <td>-46.254883</td>\n", + " <td>130280.7109</td>\n", + " <td>-166.232245</td>\n", + " <td>130381.0040</td>\n", + " <td>71.808800</td>\n", + " <td>483.59351</td>\n", + " <td>-50.913030</td>\n", + " <td>500.98896</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>74.083443</td>\n", + " <td>130581.5902</td>\n", + " <td>-45.899649</td>\n", + " <td>130556.5169</td>\n", + " <td>-165.882741</td>\n", + " <td>130656.8100</td>\n", + " <td>72.152575</td>\n", + " <td>482.86107</td>\n", + " <td>-50.437475</td>\n", + " <td>499.15786</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>74.553268</td>\n", + " <td>131083.0556</td>\n", + " <td>-45.424094</td>\n", + " <td>131057.9823</td>\n", + " <td>-165.424375</td>\n", + " <td>131158.2754</td>\n", + " <td>72.118198</td>\n", + " <td>484.50906</td>\n", + " <td>-50.013486</td>\n", + " <td>497.69298</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5995</th>\n", + " <td>116.889120</td>\n", + " <td>131860.3269</td>\n", + " <td>-3.076783</td>\n", + " <td>131810.1804</td>\n", + " <td>-123.094253</td>\n", + " <td>131910.4735</td>\n", + " <td>114.780635</td>\n", + " <td>376.10794</td>\n", + " <td>-5.254023</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5996</th>\n", + " <td>116.849013</td>\n", + " <td>131810.1804</td>\n", + " <td>-3.116890</td>\n", + " <td>131760.0339</td>\n", + " <td>-123.128630</td>\n", + " <td>131885.4002</td>\n", + " <td>114.769176</td>\n", + " <td>376.29105</td>\n", + " <td>-5.322778</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5997</th>\n", + " <td>116.384917</td>\n", + " <td>131734.9606</td>\n", + " <td>-3.586716</td>\n", + " <td>131684.8140</td>\n", + " <td>-123.586996</td>\n", + " <td>131785.1071</td>\n", + " <td>114.299351</td>\n", + " <td>376.47416</td>\n", + " <td>-5.849899</td>\n", + " <td>374.82617</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5998</th>\n", + " <td>111.125164</td>\n", + " <td>130506.3704</td>\n", + " <td>-8.846468</td>\n", + " <td>130456.2238</td>\n", + " <td>-128.858208</td>\n", + " <td>130556.5169</td>\n", + " <td>106.667553</td>\n", + " <td>478.83265</td>\n", + " <td>-13.464508</td>\n", + " <td>477.73399</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5999</th>\n", + " <td>110.878793</td>\n", + " <td>130481.2971</td>\n", + " <td>-9.092840</td>\n", + " <td>130456.2238</td>\n", + " <td>-129.104580</td>\n", + " <td>130556.5169</td>\n", + " <td>106.392533</td>\n", + " <td>478.83265</td>\n", + " <td>-13.750987</td>\n", + " <td>477.91710</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>6000 rows × 128 columns</p>\n", + "</div>" + ], + "text/plain": [ + " R1-PA1:VH R1-PM1:V R1-PA2:VH R1-PM2:V R1-PA3:VH \n", + "0 70.399324 127673.0908 -49.572308 127648.0176 -169.578319 \\\n", + "1 73.688102 130280.7109 -46.300719 130255.6377 -166.278082 \n", + "2 73.733939 130305.7842 -46.254883 130280.7109 -166.232245 \n", + "3 74.083443 130581.5902 -45.899649 130556.5169 -165.882741 \n", + "4 74.553268 131083.0556 -45.424094 131057.9823 -165.424375 \n", + "... ... ... ... ... ... \n", + "5995 116.889120 131860.3269 -3.076783 131810.1804 -123.094253 \n", + "5996 116.849013 131810.1804 -3.116890 131760.0339 -123.128630 \n", + "5997 116.384917 131734.9606 -3.586716 131684.8140 -123.586996 \n", + "5998 111.125164 130506.3704 -8.846468 130456.2238 -128.858208 \n", + "5999 110.878793 130481.2971 -9.092840 130456.2238 -129.104580 \n", + "\n", + " R1-PM3:V R1-PA4:IH R1-PM4:I R1-PA5:IH R1-PM5:I ... \n", + "0 127723.2374 65.689611 605.91099 -57.003571 626.78553 ... \\\n", + "1 130355.9307 71.831719 483.59351 -50.947407 500.98896 ... \n", + "2 130381.0040 71.808800 483.59351 -50.913030 500.98896 ... \n", + "3 130656.8100 72.152575 482.86107 -50.437475 499.15786 ... \n", + "4 131158.2754 72.118198 484.50906 -50.013486 497.69298 ... \n", + "... ... ... ... ... ... ... \n", + "5995 131910.4735 114.780635 376.10794 -5.254023 374.82617 ... \n", + "5996 131885.4002 114.769176 376.29105 -5.322778 374.82617 ... \n", + "5997 131785.1071 114.299351 376.47416 -5.849899 374.82617 ... \n", + "5998 130556.5169 106.667553 478.83265 -13.464508 477.73399 ... \n", + "5999 130556.5169 106.392533 478.83265 -13.750987 477.91710 ... \n", + "\n", + " control_panel_log3 control_panel_log4 relay1_log relay2_log \n", + "0 0 0 0 0 \\\n", + "1 0 0 0 0 \n", + "2 0 0 0 0 \n", + "3 0 0 0 0 \n", + "4 0 0 0 0 \n", + "... ... ... ... ... \n", + "5995 0 0 0 0 \n", + "5996 0 0 0 0 \n", + "5997 0 0 0 0 \n", + "5998 0 0 0 0 \n", + "5999 0 0 0 0 \n", + "\n", + " relay3_log relay4_log snort_log1 snort_log2 snort_log3 snort_log4 \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "5995 0 0 0 0 0 0 \n", + "5996 0 0 0 0 0 0 \n", + "5997 0 0 0 0 0 0 \n", + "5998 0 0 0 0 0 0 \n", + "5999 0 0 0 0 0 0 \n", + "\n", + "[6000 rows x 128 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = mTrain.drop(columns = 'marker')\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "y = mTrain['marker']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stratified Train-Test Split\n", + "The train-test split is stratified to ensure that the train and test samples from each class are almost the same percentage. This may be desirable for imbalanced number of samples as in this case. \n", + "\n", + "In such imbalanced datasets, the stratified K fold cross validation is used instead of the K-fold cross validation" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, test_size=0.15, stratify=y)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 2550\n", + "2 1275\n", + "1 1275\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "marker\n", + "0 450\n", + "2 225\n", + "1 225\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>3. Choosing a Model: KNN , training, and evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9722222222222222\n" + ] + } + ], + "source": [ + "from xgboost import XGBClassifier\n", + "xgb_clf = XGBClassifier(colsample_bytree= 0.3, learning_rate= 0.05, max_depth= 10, n_estimators= 1000)\n", + "xgb_clf.fit(X_train, y_train)\n", + "score = xgb_clf.score(X_test, y_test)\n", + "print(score)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1>4. Improving</h1>" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Edit: ***To save time, the parameters obtained from the grid search is directly hardcoded into section 3. Full code is xgb with hyperparameter tuning.ipynb" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + " xgb_clf in section 3 before tuning was previously: xgb_clf = XGBClassifier()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# params = { 'max_depth': [3,6,10],\n", + "# 'learning_rate': [0.01, 0.05, 0.1],\n", + "# 'n_estimators': [100, 500, 1000],\n", + "# 'colsample_bytree': [0.3, 0.7]}\n", + "\n", + "# from sklearn.model_selection import GridSearchCV\n", + "# im_xgb = GridSearchCV(estimator=xgb_clf, \n", + "# param_grid=params,\n", + "# scoring='neg_mean_squared_error', \n", + "# verbose=1)\n", + "# im_xgb.fit(X_train, y_train)\n", + "# print(\"Best parameters:\", im_xgb.best_params_)\n", + "# print(\"Lowest RMSE: \", (-im_xgb.best_score_)**(1/2.0))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A grid search will be performed to find the optimal value of K. \n", + "\n", + "Afterwards, the stratified K fold cross validation will be used, followed by a confusion metric as an evaluation." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 5. Metric Evaluation</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([2, 0, 2, 0, 0, 2, 1, 1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 2, 1, 0, 1, 0,\n", + " 0, 0, 0, 2, 0, 2, 0, 2, 1, 1, 1, 1, 1, 1, 0, 2, 1, 0, 1, 0, 1, 0,\n", + " 0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 1, 0, 0, 2, 0, 2, 0, 0, 2, 0, 0, 1,\n", + " 0, 2, 0, 1, 0, 0, 2, 0, 0, 2, 1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 0, 1,\n", + " 1, 0, 2, 0, 0, 0, 0, 2, 2, 2, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0,\n", + " 2, 1, 1, 2, 0, 0, 2, 1, 0, 1, 2, 0, 0, 2, 1, 0, 0, 2, 2, 2, 1, 1,\n", + " 1, 1, 2, 2, 0, 0, 1, 1, 2, 1, 0, 2, 1, 2, 0, 0, 0, 1, 0, 1, 1, 0,\n", + " 0, 0, 1, 2, 1, 1, 0, 0, 2, 0, 0, 2, 1, 1, 0, 0, 2, 0, 1, 0, 0, 0,\n", + " 0, 0, 2, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 2, 0,\n", + " 0, 2, 1, 2, 0, 0, 1, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 1,\n", + " 0, 1, 1, 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1,\n", + " 0, 1, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 2, 0, 0, 1, 0, 2, 0,\n", + " 0, 0, 0, 0, 1, 0, 2, 2, 0, 0, 1, 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2,\n", + " 2, 2, 2, 2, 1, 1, 0, 2, 2, 0, 0, 2, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 2, 0, 2, 0, 1, 0, 1, 1, 0, 2, 1,\n", + " 0, 1, 1, 2, 0, 1, 0, 2, 2, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 1, 0, 2, 1, 1, 1, 1, 0,\n", + " 0, 0, 1, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 1, 0, 0, 1, 2, 0, 0, 0, 2,\n", + " 0, 1, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 2, 2, 2, 1, 0, 1, 0, 0, 2, 0,\n", + " 2, 0, 2, 2, 2, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 0, 1, 2, 1, 0, 2, 0,\n", + " 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 0, 2, 0, 1,\n", + " 2, 1, 0, 0, 0, 1, 0, 2, 2, 1, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 2,\n", + " 1, 1, 0, 0, 0, 0, 0, 2, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 2, 0,\n", + " 2, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 2, 2, 0, 2, 0, 1, 0,\n", + " 0, 1, 0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 2, 1, 1,\n", + " 1, 2, 0, 0, 2, 0, 0, 1, 2, 1, 0, 0, 1, 0, 2, 2, 1, 1, 0, 1, 0, 2,\n", + " 2, 0, 0, 2, 0, 2, 0, 1, 1, 1, 0, 0, 2, 2, 1, 0, 0, 1, 0, 0, 1, 2,\n", + " 0, 2, 0, 2, 0, 2, 1, 0, 0, 0, 0, 2, 1, 1, 2, 0, 1, 1, 0, 0, 0, 1,\n", + " 0, 0, 1, 1, 0, 0, 2, 2, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0, 2, 1, 2, 0,\n", + " 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 2, 2, 2, 2, 0,\n", + " 1, 1, 2, 2, 0, 0, 1, 0, 0, 2, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 2, 0,\n", + " 1, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 1, 0, 0, 0, 2,\n", + " 1, 2, 0, 2, 0, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 2, 0, 0, 0, 2,\n", + " 0, 1, 0, 0, 1, 2, 0, 2, 2, 1, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 1, 0,\n", + " 0, 2, 0, 2, 0, 0, 1, 1, 1, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,\n", + " 0, 2, 0, 0, 2, 0, 2, 1, 1, 2, 0, 1, 0, 2, 2, 0, 0, 1, 0, 2, 0, 2,\n", + " 0, 2, 2, 0, 0, 0, 1, 0, 0, 2, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0,\n", + " 1, 0, 2, 1, 0, 0, 1, 2, 0, 0, 1, 0, 1, 0, 2, 2, 2, 2, 2, 0, 1, 1,\n", + " 1, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 1, 0, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0,\n", + " 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 0, 2, 0, 1, 1, 1, 1, 1, 0],\n", + " dtype=int64)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_pred = xgb_clf.predict(X_test)\n", + "my_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9722222222222222" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "score = xgb_clf.score(X_test, y_test)\n", + "score" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[448, 2, 0],\n", + " [ 0, 217, 8],\n", + " [ 7, 8, 210]], dtype=int64)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "confusion_matrix(y_test, my_pred)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.98 1.00 0.99 450\n", + " 1 0.96 0.96 0.96 225\n", + " 2 0.96 0.93 0.95 225\n", + "\n", + " accuracy 0.97 900\n", + " macro avg 0.97 0.96 0.97 900\n", + "weighted avg 0.97 0.97 0.97 900\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_test, my_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABC2ElEQVR4nO3deVxU5f4H8M+wDesMIsKAAqEkSuISGs6tzIVAM9Ok20ZJZvbLwFLSzHI3o6uVZpF2y8TuldQWLclUskRN1MQotyiQAoMBjQDB2GbO7w8uUxOYDLMx53zer9d5Xec5z3POdy7ml2c555EJgiCAiIiIRMvB1gEQERGRZTHZExERiRyTPRERkcgx2RMREYkckz0REZHIMdkTERGJHJM9ERGRyDnZOgBT6HQ6lJaWwsvLCzKZzNbhEBGRkQRBwKVLlxAYGAgHB8v1P+vr69HY2GjydVxcXODq6mqGiKzLrpN9aWkpgoKCbB0GERGZqKSkBL169bLItevr6xEa4glNhdbka6lUKhQVFdldwrfrZO/l5QUA+PnENVB4ckZC7O4MH2jrEMia+HJPSWhGEw5hl/7fc0tobGyEpkKLn3OvgcKr87mi5pIOIVE/obGxkcnemlqH7hWeDib9AMk+OMmcbR0CWRWTvST878dsjalYTy8ZPL06fx8d7He62K6TPRERUUdpBR20JvwOqRV05gvGypjsiYhIEnQQoDNhxMiUtrbGsW8iIiKRY8+eiIgkQQcdTBmIN621bTHZExGRJGgFAVoTnvIwpa2tcRifiIhI5NizJyIiSZDyAj0meyIikgQdBGglmuw5jE9ERCRy7NkTEZEkcBifiIhI5Lgan4iIiESLPXsiIpIE3f8OU9rbKyZ7IiKSBK2Jq/FNaWtrTPZERCQJWgEm7npnvlisjXP2REREIseePRERSQLn7ImIiEROBxm0kJnU3l5xGJ+IiEjk2LMnIiJJ0Akthynt7RWTPRERSYLWxGF8U9raGofxiYiIRI49eyIikgQp9+yZ7ImISBJ0ggw6wYTV+Ca0tTUO4xMREYkce/ZERCQJHMYnIiISOS0coDVhQFtrxlisjcmeiIgkQTBxzl7gnD0RERF1VezZExGRJHDOnoiISOS0ggO0gglz9nb8ulwO4xMREYkce/ZERCQJOsigM6GPq4P9du3ZsyciIklonbM35eisF198ETKZDLNmzdKX1dfXIykpCd27d4enpyfi4+NRXl5u0K64uBjjx4+Hu7s7/Pz8MHfuXDQ3Nxt9fyZ7IiIiC/r666/x5ptvYuDAgQbls2fPxs6dO/H+++8jOzsbpaWlmDx5sv68VqvF+PHj0djYiMOHD2PTpk1IT0/HokWLjI6ByZ6IiCShdYGeKYexamtrkZCQgLfeegvdunXTl1dXV2PDhg145ZVXMHr0aERFRWHjxo04fPgwjhw5AgDYu3cvzpw5g//+978YPHgwxo0bh+XLlyMtLQ2NjY1GxcFkT0REktAyZ2/aAQA1NTUGR0NDwxXvmZSUhPHjxyMmJsagPDc3F01NTQbl/fr1Q3BwMHJycgAAOTk5iIyMhL+/v75OXFwcampqcPr0aaO+O5M9ERGREYKCgqBUKvVHampqu/W2bNmCEydOtHteo9HAxcUF3t7eBuX+/v7QaDT6On9O9K3nW88Zg6vxiYhIEnQmvhu/dTV+SUkJFAqFvlwul7epW1JSgieffBJZWVlwdXXt9D3NhT17IiKSBHPN2SsUCoOjvWSfm5uLiooKXH/99XBycoKTkxOys7Oxdu1aODk5wd/fH42NjaiqqjJoV15eDpVKBQBQqVRtVue3fm6t01FM9kREJAk6OJh8dNSYMWNw8uRJ5OXl6Y+hQ4ciISFB/2dnZ2fs27dP3yY/Px/FxcVQq9UAALVajZMnT6KiokJfJysrCwqFAhEREUZ9dw7jExERmZmXlxcGDBhgUObh4YHu3bvry6dNm4aUlBT4+PhAoVBg5syZUKvVGD58OAAgNjYWERERePDBB7Fy5UpoNBosWLAASUlJ7Y4m/B0meyIikgStIIPWhG1qTWnbntWrV8PBwQHx8fFoaGhAXFwc3njjDf15R0dHZGZmYsaMGVCr1fDw8EBiYiKWLVtm9L2Y7ImISBK0Ji7Q05r4utz9+/cbfHZ1dUVaWhrS0tKu2CYkJAS7du0y6b4A5+yJiIhEjz17IiKSBJ3gAJ0JW9zqBPvdCIfJnoiIJMHWw/i2xGF8IiIikWPPnoiIJEEH01bU68wXitUx2RMRkSQY+2Kc9trbK/uNnIiIiDqEPXsiIpKEzu5J/+f29orJnoiIJOHPe9J3tr29YrInIiJJkHLP3n4jF7Gtr/khLnAw1i3q2eacIADPJfRGXOBgHP5MaXAuP88N8+7ug8n9IhHffwCeva83Ck/bfh9lMt49yeVY+2k+tud/h63fnsLiDefQq0+9rcMiC5rw0EVsOnoGO899h1czf0T44Mu2DolEpEsk+7S0NFxzzTVwdXVFdHQ0jh07ZuuQbCY/zw2f/rc7QiN+b/f89rd6QNbOSNLvdQ54LqEPegQ24tXMH/DyjgK4eerw3P190Nxk4aDJ7AYOr8XOTb6YNeFazL+vDxydgRcyCiF309o6NLKAW+74DY8uLsXmV1RIiuuLc2dcsSLjHJTd+R+vObW+VMeUw17ZPPKtW7ciJSUFixcvxokTJzBo0CDExcUZ7N8rFb/XOeBfySGYtaoEXsq2/6gXnnLDh2/2QMorxW3OlRTIcek3J0yZq0FQWAOuCa/HAyka/HbBGeXnXawRPpnRcw/0Qda27vj5BzecO+OGl2cFw79XE64d2P4vgWTfJj96EbszfLB3qw+Kf3TF2nm90PC7DHH3Vdo6NFHRCTKTD3tl82T/yiuvYPr06Zg6dSoiIiKwfv16uLu745133rF1aFb3+rO9cMOYGlw/orbNufrLMryYFIKkFefh49fc5nyvPg1QdGvGnve6o6lRhobfZdj9XncEX1sPVVCjNcInC/JQtPzyd6nK0caRkLk5Oetw7cDLOHHQS18mCDJ8c9ALEVEcyifzsOkCvcbGRuTm5mL+/Pn6MgcHB8TExCAnJ6dN/YaGBjQ0NOg/19TUWCVOa9i/wxsFJ93w2q4f2j3/5pKeiBhah3+Mbf87u3vqsOrDAix5OBQZa/wBAIGhDXjhvUI4chmmXZPJBDy29BecOuaBn/PdbB0OmZnCRwtHJ6DqguF/qL9ddEJQWMMVWlFn6EwciudLdTrp4sWL0Gq18Pf3Nyj39/eHRqNpUz81NRVKpVJ/BAUFWStUi6r4xRnrFvXEvNd/hotr240WcvYokPeVFx5b9ssVr9HwuwyvPBWE64bVYU3mD3jl4x9xTb96LHywNxp+t9+hJwKSXziPkPDfkfp4iK1DIbJrrbvemXLYK7vq882fPx8pKSn6zzU1NaJI+AXfuaPqojOS4sL1ZTqtDCePeOCTjb64fcpFlP3kgsn9Ig3aLZ9+DQZE12HVhwX4cns3lJe4YM3OH+Hwv7+Pz6T9jPj+A5CzR4mRk6qs+I3IXJKeP4/omBo8NTkMF8u49kKMaiodoW0GvHsYTs91823Gbxfs6p9o6sJs+jfJ19cXjo6OKC8vNygvLy+HSqVqU18ul0Mul1srPKsZfPMlvPnF9wZlL88ORlBYPe5OqoDCpxnjH/zV4Pz/je6H/1vyC4bHtgzrN/zuAAcHGKzUd3AQIJMBOnvevUGyBCQ9/wv+MbYac/8ZhvIS8f29pxbNTQ748Tt3DLnpEnJ2tzxOK5MJGHxTLT5J727j6MRFCxm0JrwYx5S2tmbTZO/i4oKoqCjs27cPkyZNAgDodDrs27cPycnJtgzNqtw9dbimn+Ez1K7uOnh10+rL21uU59ezCarglsV3Q0ZcwlvPB+L1Z3th4sMXoNPJsO11Pzg6AYNubLvgj7q25BfOY9Sk37Dk4d74vdYB3Xq0PIJVd8kRjfX2O5RI7fvo376Ys6YEP3zrjvxv3HHn9Atwdddh7xYfW4cmKqYOxXMY3wQpKSlITEzE0KFDccMNN2DNmjWoq6vD1KlTbR2aXQm+tgFL089h8ysqzJrQFzIHAWEDfseKzYXo7t/2FwXq2iYktozkvPRhgUH5S7ODkLWNvT2xyf6kG5TdtZgyV4NuPZpx7rQbnksIRdVFZ1uHRiJh82R/zz334MKFC1i0aBE0Gg0GDx6M3bt3t1m0JzWr/vKP/F/tKc1rUxZ1Sy2ibvn7dmQf4noOtnUIZGWfbPTFJxt9bR2GqGlh2lC8Pb/SyubJHgCSk5MlNWxPRETWx2F8IiIikeNGOERERCRa7NkTEZEkCCbuZy/w0TsiIqKujcP4REREJFrs2RMRkSSYuk2tPW9xy2RPRESSoDVx1ztT2tqa/UZOREREHcJkT0REktA6jG/KYYx169Zh4MCBUCgUUCgUUKvV+Oyzz/TnR44cCZlMZnA89thjBtcoLi7G+PHj4e7uDj8/P8ydOxfNzca/Ap3D+EREJAk6OEBnQh/X2La9evXCiy++iGuvvRaCIGDTpk2YOHEivvnmG1x33XUAgOnTp2PZsmX6Nu7u7vo/a7VajB8/HiqVCocPH0ZZWRmmTJkCZ2dnvPDCC0bFwmRPRERkARMmTDD4vGLFCqxbtw5HjhzRJ3t3d/d2t3QHgL179+LMmTP4/PPP4e/vj8GDB2P58uWYN28elixZAhcXlw7HwmF8IiKSBK0gM/kAgJqaGoOjoaHh6vfWarFlyxbU1dVBrVbryzdv3gxfX18MGDAA8+fPx+XLl/XncnJyEBkZabAxXFxcHGpqanD69Gmjvjt79kREJAnmevQuKCjIoHzx4sVYsmRJu21OnjwJtVqN+vp6eHp6Yvv27YiIiAAA3H///QgJCUFgYCC+++47zJs3D/n5+fjoo48AABqNps0OsK2fNRqNUbEz2RMRkSQIJu56J/yvbUlJCRQKhb5cLpdfsU14eDjy8vJQXV2NDz74AImJicjOzkZERAQeffRRfb3IyEgEBARgzJgxKCwsRJ8+fTodZ3s4jE9ERGSE1tX1rcffJXsXFxeEhYUhKioKqampGDRoEF599dV260ZHRwMACgoKAAAqlQrl5eUGdVo/X2me/0qY7ImISBK0kJl8mEqn011xjj8vLw8AEBAQAABQq9U4efIkKioq9HWysrKgUCj0UwEdxWF8IiKSBJ1g2itvdYJx9efPn49x48YhODgYly5dQkZGBvbv3489e/agsLAQGRkZuO2229C9e3d89913mD17NkaMGIGBAwcCAGJjYxEREYEHH3wQK1euhEajwYIFC5CUlPS3owntYbInIiKygIqKCkyZMgVlZWVQKpUYOHAg9uzZg1tvvRUlJSX4/PPPsWbNGtTV1SEoKAjx8fFYsGCBvr2joyMyMzMxY8YMqNVqeHh4IDEx0eC5/I5isiciIknQmbhAz9i2GzZsuOK5oKAgZGdnX/UaISEh2LVrl1H3bQ+TPRERSYIOMuhMmHc3pa2tcYEeERGRyLFnT0REkvDnt+B1tr29YrInIiJJsPacfVdiv5ETERFRh7BnT0REkqCDie/Gt+MFekz2REQkCYKJq/EFJnsiIqKuzVy73tkjztkTERGJHHv2REQkCVJejc9kT0REksBhfCIiIhIt9uyJiEgSpPxufCZ7IiKSBA7jExERkWixZ09ERJIg5Z49kz0REUmClJM9h/GJiIhEjj17IiKSBCn37JnsiYhIEgSY9vicYL5QrI7JnoiIJEHKPXvO2RMREYkce/ZERCQJUu7ZM9kTEZEkSDnZcxifiIhI5NizJyIiSZByz57JnoiIJEEQZBBMSNimtLU1DuMTERGJHHv2REQkCdzPnoiISOSkPGfPYXwiIiKRY7InIiJJaF2gZ8phjHXr1mHgwIFQKBRQKBRQq9X47LPP9Ofr6+uRlJSE7t27w9PTE/Hx8SgvLze4RnFxMcaPHw93d3f4+flh7ty5aG5uNvq7M9kTEZEktA7jm3IYo1evXnjxxReRm5uL48ePY/To0Zg4cSJOnz4NAJg9ezZ27tyJ999/H9nZ2SgtLcXkyZP17bVaLcaPH4/GxkYcPnwYmzZtQnp6OhYtWmT0d5cJgmC3G/nU1NRAqVTitx96Q+HF31vELq7nEFuHQNZkv/80kRGahSbsx8eorq6GQqGwyD1ac0XUh7Ph5CHv9HWa6xqQG78aJSUlBrHK5XLI5R27ro+PD1atWoW77roLPXr0QEZGBu666y4AwPfff4/+/fsjJycHw4cPx2effYbbb78dpaWl8Pf3BwCsX78e8+bNw4ULF+Di4tLh2JkhiYiIjBAUFASlUqk/UlNTr9pGq9Viy5YtqKurg1qtRm5uLpqamhATE6Ov069fPwQHByMnJwcAkJOTg8jISH2iB4C4uDjU1NToRwc6ShSr8e/sGwknmbOtwyALK1w13NYhkBWFPZtr6xDICmSCDGiyzr0EE1fjt87Zt9ezv5KTJ09CrVajvr4enp6e2L59OyIiIpCXlwcXFxd4e3sb1Pf394dGowEAaDQag0Tfer71nDFEkeyJiIiuRoBps0OtTVsX3HVEeHg48vLyUF1djQ8++ACJiYnIzs7ufBCdxGRPRERkIS4uLggLCwMAREVF4euvv8arr76Ke+65B42NjaiqqjLo3ZeXl0OlUgEAVCoVjh07ZnC91tX6rXU6inP2REQkCa1v0DPlMDkGnQ4NDQ2IioqCs7Mz9u3bpz+Xn5+P4uJiqNVqAIBarcbJkydRUVGhr5OVlQWFQoGIiAij7suePRERSYK1N8KZP38+xo0bh+DgYFy6dAkZGRnYv38/9uzZA6VSiWnTpiElJQU+Pj5QKBSYOXMm1Go1hg9vWZ8UGxuLiIgIPPjgg1i5ciU0Gg0WLFiApKSkDq/+b8VkT0REZAEVFRWYMmUKysrKoFQqMXDgQOzZswe33norAGD16tVwcHBAfHw8GhoaEBcXhzfeeEPf3tHREZmZmZgxYwbUajU8PDyQmJiIZcuWGR0Lkz0REUmCTpC1rP43ob0xNmzY8LfnXV1dkZaWhrS0tCvWCQkJwa5du4y6b3uY7ImISBIEwcTV+Hb8nicu0CMiIhI59uyJiEgSrL1ArythsiciIklgsiciIhI5ay/Q60o4Z09ERCRy7NkTEZEkSHk1PpM9ERFJQkuyN2XO3ozBWBmH8YmIiESOPXsiIpIErsYnIiISOQF/7Enf2fb2isP4REREIseePRERSQKH8YmIiMROwuP4TPZERCQNJvbsYcc9e87ZExERiRx79kREJAl8gx4REZHISXmBHofxiYiIRI49eyIikgZBZtoiOzvu2TPZExGRJEh5zp7D+ERERCLHnj0REUkDX6pDREQkblJejd+hZP/JJ590+IJ33HFHp4MhIiIi8+tQsp80aVKHLiaTyaDVak2Jh4iIyHLseCjeFB1K9jqdztJxEBERWZSUh/FNWo1fX19vrjiIiIgsSzDDYaeMTvZarRbLly9Hz5494enpiXPnzgEAFi5ciA0bNpg9QCIiIjKN0cl+xYoVSE9Px8qVK+Hi4qIvHzBgAN5++22zBkdERGQ+MjMc9snoZP/uu+/i3//+NxISEuDo6KgvHzRoEL7//nuzBkdERGQ2Vh7GT01NxbBhw+Dl5QU/Pz9MmjQJ+fn5BnVGjhwJmUxmcDz22GMGdYqLizF+/Hi4u7vDz88Pc+fORXNzs1GxGP2c/S+//IKwsLA25TqdDk1NTcZejoiISJSys7ORlJSEYcOGobm5Gc8++yxiY2Nx5swZeHh46OtNnz4dy5Yt0392d3fX/1mr1WL8+PFQqVQ4fPgwysrKMGXKFDg7O+OFF17ocCxGJ/uIiAgcPHgQISEhBuUffPABhgwZYuzliIiIrMNMb9CrqakxKJbL5ZDL5W2q79692+Bzeno6/Pz8kJubixEjRujL3d3doVKp2r3l3r17cebMGXz++efw9/fH4MGDsXz5csybNw9LliwxmE7/O0YP4y9atAjJycn417/+BZ1Oh48++gjTp0/HihUrsGjRImMvR0REZB2tu96ZcgAICgqCUqnUH6mpqR26fXV1NQDAx8fHoHzz5s3w9fXFgAEDMH/+fFy+fFl/LicnB5GRkfD399eXxcXFoaamBqdPn+7wVze6Zz9x4kTs3LkTy5Ytg4eHBxYtWoTrr78eO3fuxK233mrs5YiIiOxKSUkJFAqF/nN7vfq/0ul0mDVrFm688UYMGDBAX37//fcjJCQEgYGB+O677zBv3jzk5+fjo48+AgBoNBqDRA9A/1mj0XQ45k69G//mm29GVlZWZ5oSERHZhLm2uFUoFAbJviOSkpJw6tQpHDp0yKD80Ucf1f85MjISAQEBGDNmDAoLC9GnT5/OB/sXnd4I5/jx4zh79iyAlnn8qKgoswVFRERkdjba9S45ORmZmZk4cOAAevXq9bd1o6OjAQAFBQXo06cPVCoVjh07ZlCnvLwcAK44z98eo5P9+fPncd999+Grr76Ct7c3AKCqqgr/+Mc/sGXLlqt+ESIiIikQBAEzZ87E9u3bsX//foSGhl61TV5eHgAgICAAAKBWq7FixQpUVFTAz88PAJCVlQWFQoGIiIgOx2L0Ar1HHnkETU1NOHv2LCorK1FZWYmzZ89Cp9PhkUceMfZyRERE1mGmBXodlZSUhP/+97/IyMiAl5cXNBoNNBoNfv/9dwBAYWEhli9fjtzcXPz000/45JNPMGXKFIwYMQIDBw4EAMTGxiIiIgIPPvggvv32W+zZswcLFixAUlJSh9YKtDK6Z5+dnY3Dhw8jPDxcXxYeHo7XXnsNN998s7GXIyIisgqZ0HKY0t4Y69atA9Dy4pw/27hxIx566CG4uLjg888/x5o1a1BXV4egoCDEx8djwYIF+rqOjo7IzMzEjBkzoFar4eHhgcTERIPn8jvC6GQfFBTU7stztFotAgMDjb0cERGRdVh5zl64ymrAoKAgZGdnX/U6ISEh2LVrl3E3/wujh/FXrVqFmTNn4vjx4/qy48eP48knn8RLL71kUjBERERkfh3q2Xfr1g0y2R9zFXV1dYiOjoaTU0vz5uZmODk54eGHH8akSZMsEigREZFJOjHv3qa9nepQsl+zZo2FwyAiIrIwGz161xV0KNknJiZaOg4iIiKykE6/VAcA6uvr0djYaFBm7FuFiIiIrELCPXujF+jV1dUhOTkZfn5+8PDwQLdu3QwOIiKiLsnK+9l3JUYn+6effhpffPEF1q1bB7lcjrfffhtLly5FYGAg3n33XUvESERERCYwehh/586dePfddzFy5EhMnToVN998M8LCwhASEoLNmzcjISHBEnESERGZRsKr8Y3u2VdWVqJ3794AWubnKysrAQA33XQTDhw4YN7oiIiIzKT1DXqmHPbK6J597969UVRUhODgYPTr1w/btm3DDTfcgJ07d+o3xiHLmfDQRdw1owI+PZpx7owb3ljQE/l57rYOi4zw2HUnEBtUhN6KKjRoHXHiggorvxmOokve+jr3hJ3BHdf8iOt8LsLTuQlDtk3FpaY/3oMd7fcLNt+6s93r3/nZZJys9LP01yAzcXAQ8MDsXzD6zl/RrUcTfi13wecf+CJjbQAA++1JUtdidM9+6tSp+PbbbwEAzzzzDNLS0uDq6orZs2dj7ty5Rl3rwIEDmDBhAgIDAyGTybBjxw5jw5GUW+74DY8uLsXmV1RIiuuLc2dcsSLjHJTd276+mLquG/zK8N8frsM/99yJxH23w8lBh/QxmXBz/OPn6ObYjAOlwVh36vp2r3HiogrDP5xicGwt6IfiS144WdnDWl+FzOCfM8ow/oELeGNRCB4dE4l3XuyFu/6vDBMfqrB1aOIj4QV6RvfsZ8+erf9zTEwMvv/+e+Tm5iIsLEy/S09H1dXVYdCgQXj44YcxefJkY0ORnMmPXsTuDB/s3eoDAFg7rxduGFODuPsqse11fxtHRx318JfjDT7PyxmFY3dtwoDuF/B1Rcv+Eun5Lf8tRfv90u41mnSOuFj/x4iOk0yLmF4/4d38SLA3aF8iompxJMsbx77wBgCUn5dj5B2VCB9cC4D/XZN5mPScPdDygv6QkJBOtR03bhzGjRtnagiS4OSsw7UDL2PL638MzwqCDN8c9EJE1GUbRkam8nJueVdFVYNrp68xptfP8HZpwIeF4VevTF3KmVxP3HbfBfQMrccvRa4I7X8Z1w29hH8/H2zr0ERHBhN3vTNbJNbXoWS/du3aDl/wiSee6HQwV9PQ0ICGhgb955qaGovdq6tR+Gjh6ARUXTD8kf120QlBYQ1XaEVdnQwCnhv6FY5XqPBjtU+nr/PPPmdxsKwXNL97mjE6soZtbwTA3VOLt744CZ1WBgdHAZtW9cSXO7rbOjQSkQ4l+9WrV3foYjKZzKLJPjU1FUuXLrXY9Ymsbcmwg+irrMS9eyd1+hoqt1rcHHAeTxy61XyBkdWMuL0Soyf9in890Rs//+CGPhGX8X+Li1sW6n3oa+vwxEXCj951KNkXFRVZOo4OmT9/PlJSUvSfa2pqEBQUZMOIrKem0hHaZsC7R7NBeTffZvx2weTZGLKBxUMPYnTPn3Ff1kSTeuTxffJR1SjHvvOdm04j23rk2RJsWxeA7J0tPfmf8t3h16sR9zxexmRvbnxdrn2Qy+VQKBQGh1Q0Nzngx+/cMeSmS/oymUzA4JtqcSaXj97ZFwGLhx7ErUFFeGDfBJyvM+XvsYD43t9j+7lwNAuOZouQrEfupoNOZ9hj1GkBmYMdZxbqctgltCMf/dsXc9aU4Idv3ZH/jTvunH4Bru467N3S+blesr6lww5iwjUFeCx7LOqaXODr2rLA8lKTCxq0Lf9J+rpeRg+3ywjxalmXEu5dibpmZ5TWeaK68Y+FfGr/XxDsdQnbCvtZ/4uQWRz93Bv3JpfiQqlLyzD+dZdx5yPl2LuNvXqzk3DP3qbJvra2FgUFBfrPRUVFyMvLg4+PD4KDuRL1r7I/6QZldy2mzNWgW49mnDvthucSQlF10dnWoZEREvqeAQBk3PqJQfnTOSPx0bmWpH3/tafxxMBc/bktsR+3qQMA/wz7HrkX/HGuhptQ2as3FodgylO/IGn5z/D2bXmpzmcZPbD51UBbhyY6pr4Fz57foCcTBMFm4e/fvx+jRo1qU56YmIj09PSrtq+pqYFSqcRITISTjAlP7ApXqW0dAllR2LO5V69Edq9ZaMKXTe+jurraYlOzrbnimhUr4ODa+UdcdfX1+Om55ywaq6XYtGc/cuRI2PB3DSIikhIJD+N3aoHewYMH8cADD0CtVuOXX1re8PWf//wHhw4dMmtwREREZiPh1+Uanew//PBDxMXFwc3NDd98843+JTfV1dV44YUXzB4gERERmcboZP/8889j/fr1eOutt+Ds/Mc8+Y033ogTJ06YNTgiIiJz4Ra3RsjPz8eIESPalCuVSlRVVZkjJiIiIvOT8Bv0jO7Zq1Qqg8flWh06dAi9e/c2S1BERERmxzn7jps+fTqefPJJHD16FDKZDKWlpdi8eTPmzJmDGTNmWCJGIiIiMoHRw/jPPPMMdDodxowZg8uXL2PEiBGQy+WYM2cOZs6caYkYiYiITCbll+oYnexlMhmee+45zJ07FwUFBaitrUVERAQ8Pbm1JhERdWESfs6+0y/VcXFxQUREhDljISIiIgswes5+1KhRGD169BUPIiKiLsnUx+6M7NmnpqZi2LBh8PLygp+fHyZNmoT8/HyDOvX19UhKSkL37t3h6emJ+Ph4lJeXG9QpLi7G+PHj4e7uDj8/P8ydOxfNzYbbnV+N0cl+8ODBGDRokP6IiIhAY2MjTpw4gcjISGMvR0REZB1WXo2fnZ2NpKQkHDlyBFlZWWhqakJsbCzq6ur0dWbPno2dO3fi/fffR3Z2NkpLSzF58mT9ea1Wi/Hjx6OxsRGHDx/Gpk2bkJ6ejkWLFhkVi9HD+KtXr263fMmSJaitrTX2ckRERKK0e/dug8/p6enw8/NDbm4uRowYgerqamzYsAEZGRn6kfGNGzeif//+OHLkCIYPH469e/fizJkz+Pzzz+Hv74/Bgwdj+fLlmDdvHpYsWQIXF5cOxdKpd+O354EHHsA777xjrssRERGZl5l69jU1NQZH62vjr6a6uhoA4OPjAwDIzc1FU1MTYmJi9HX69euH4OBg5OTkAABycnIQGRkJf39/fZ24uDjU1NTg9OnTHf7qZkv2OTk5cDVh60AiIiJLMtfrcoOCgqBUKvVHamrqVe+t0+kwa9Ys3HjjjRgwYAAAQKPRwMXFBd7e3gZ1/f39odFo9HX+nOhbz7ee6yijh/H/PJcAAIIgoKysDMePH8fChQuNvRwREZFdKSkpMdjPXi6XX7VNUlISTp06ZbPdYY1O9kql0uCzg4MDwsPDsWzZMsTGxpotMCIioq5IoVAYJPurSU5ORmZmJg4cOIBevXrpy1UqFRobG1FVVWXQuy8vL4dKpdLXOXbsmMH1Wlfrt9bpCKOSvVarxdSpUxEZGYlu3boZ05SIiMi2rPxSHUEQMHPmTGzfvh379+9HaGiowfmoqCg4Oztj3759iI+PB9Cy2VxxcTHUajUAQK1WY8WKFaioqICfnx8AICsrCwqFwqh33RiV7B0dHREbG4uzZ88y2RMRkV2x9utyk5KSkJGRgY8//hheXl76OXalUgk3NzcolUpMmzYNKSkp8PHxgUKhwMyZM6FWqzF8+HAAQGxsLCIiIvDggw9i5cqV0Gg0WLBgAZKSkjo0fdDK6AV6AwYMwLlz54xtRkREJCnr1q1DdXU1Ro4ciYCAAP2xdetWfZ3Vq1fj9ttvR3x8PEaMGAGVSoWPPvpIf97R0RGZmZlwdHSEWq3GAw88gClTpmDZsmVGxWL0nP3zzz+POXPmYPny5YiKioKHh4fBeWPmMYiIiKzKiu+3F4Sr38zV1RVpaWlIS0u7Yp2QkBDs2rXLpFg6nOyXLVuGp556CrfddhsA4I477oBMJtOfFwQBMpkMWq3WpICIiIgsghvhXN3SpUvx2GOP4csvv7RkPERERGRmHU72rcMRt9xyi8WCISIishTuZ99Bfx62JyIisiscxu+Yvn37XjXhV1ZWmhQQERERmZdRyX7p0qVt3qBHRERkDziM30H33nuv/g0+REREdkXCw/gdfqkO5+uJiIjsk9Gr8YmIiOyShHv2HU72Op3OknEQERFZFOfsiYiIxE7CPXujN8IhIiIi+8KePRERSYOEe/ZM9kREJAlSnrPnMD4REZHIsWdPRETSwGF8IiIiceMwPhEREYkWe/ZERCQNHMYnIiISOQknew7jExERiRx79kREJAmy/x2mtLdXTPZERCQNEh7GZ7InIiJJ4KN3REREJFrs2RMRkTRwGJ+IiEgC7Dhhm4LD+ERERCLHnj0REUmClBfoMdkTEZE0SHjOnsP4REREFnDgwAFMmDABgYGBkMlk2LFjh8H5hx56CDKZzOAYO3asQZ3KykokJCRAoVDA29sb06ZNQ21trdGxMNkTEZEktA7jm3IYo66uDoMGDUJaWtoV64wdOxZlZWX647333jM4n5CQgNOnTyMrKwuZmZk4cOAAHn30UaO/O4fxiYhIGqw8jD9u3DiMGzfub+vI5XKoVKp2z509exa7d+/G119/jaFDhwIAXnvtNdx222146aWXEBgY2OFY2LMnIiIyQk1NjcHR0NDQ6Wvt378ffn5+CA8Px4wZM/Drr7/qz+Xk5MDb21uf6AEgJiYGDg4OOHr0qFH3EUXPXubsApnM2dZhkIWFPZtr6xDIis4tj7J1CGQFuvp6YPH7VrmXuVbjBwUFGZQvXrwYS5YsMfp6Y8eOxeTJkxEaGorCwkI8++yzGDduHHJycuDo6AiNRgM/Pz+DNk5OTvDx8YFGozHqXqJI9kRERFdlpmH8kpISKBQKfbFcLu/U5e699179nyMjIzFw4ED06dMH+/fvx5gxY0wItC0O4xMRkTQIZjgAKBQKg6Ozyf6vevfuDV9fXxQUFAAAVCoVKioqDOo0NzejsrLyivP8V8JkT0RE1AWcP38ev/76KwICAgAAarUaVVVVyM39Ywrziy++gE6nQ3R0tFHX5jA+ERFJgrXfoFdbW6vvpQNAUVER8vLy4OPjAx8fHyxduhTx8fFQqVQoLCzE008/jbCwMMTFxQEA+vfvj7Fjx2L69OlYv349mpqakJycjHvvvdeolfgAe/ZERCQVZhrG76jjx49jyJAhGDJkCAAgJSUFQ4YMwaJFi+Do6IjvvvsOd9xxB/r27Ytp06YhKioKBw8eNJgW2Lx5M/r164cxY8bgtttuw0033YR///vfRn919uyJiIgsYOTIkRCEK/+GsGfPnqtew8fHBxkZGSbHwmRPRESSIBMEyP4m+Xakvb1isiciImngRjhEREQkVuzZExGRJHA/eyIiIrHjMD4RERGJFXv2REQkCRzGJyIiEjsJD+Mz2RMRkSRIuWfPOXsiIiKRY8+eiIikgcP4RERE4mfPQ/Gm4DA+ERGRyLFnT0RE0iAILYcp7e0Ukz0REUkCV+MTERGRaLFnT0RE0sDV+EREROIm07UcprS3VxzGJyIiEjn27ImISBo4jE9ERCRuUl6Nz2RPRETSIOHn7DlnT0REJHLs2RMRkSRwGJ+IiEjsJLxAj8P4REREIseePRERSQKH8YmIiMSOq/GJiIhIrNizJyIiSeAwPhERkdhxNT4RERGZ04EDBzBhwgQEBgZCJpNhx44dBucFQcCiRYsQEBAANzc3xMTE4McffzSoU1lZiYSEBCgUCnh7e2PatGmora01OhYmeyIikoTWYXxTDmPU1dVh0KBBSEtLa/f8ypUrsXbtWqxfvx5Hjx6Fh4cH4uLiUF9fr6+TkJCA06dPIysrC5mZmThw4AAeffRRo787h/GJiEgadELLYUp7ADU1NQbFcrkccrm8TfVx48Zh3Lhx7V5KEASsWbMGCxYswMSJEwEA7777Lvz9/bFjxw7ce++9OHv2LHbv3o2vv/4aQ4cOBQC89tpruO222/DSSy8hMDCww6GzZ09ERNIgmOEAEBQUBKVSqT9SU1ONDqWoqAgajQYxMTH6MqVSiejoaOTk5AAAcnJy4O3trU/0ABATEwMHBwccPXrUqPuxZ09ERGSEkpISKBQK/ef2evVXo9FoAAD+/v4G5f7+/vpzGo0Gfn5+BuednJzg4+Ojr9NRTPZERCQJMpj46N3//lehUBgke3vAYXwiIpKG1jfomXKYiUqlAgCUl5cblJeXl+vPqVQqVFRUGJxvbm5GZWWlvk5HMdkTERFZWWhoKFQqFfbt26cvq6mpwdGjR6FWqwEAarUaVVVVyM3N1df54osvoNPpEB0dbdT9OIxPRESSYO036NXW1qKgoED/uaioCHl5efDx8UFwcDBmzZqF559/Htdeey1CQ0OxcOFCBAYGYtKkSQCA/v37Y+zYsZg+fTrWr1+PpqYmJCcn49577zVqJT7AZE9ERFJh5TfoHT9+HKNGjdJ/TklJAQAkJiYiPT0dTz/9NOrq6vDoo4+iqqoKN910E3bv3g1XV1d9m82bNyM5ORljxoyBg4MD4uPjsXbtWqNDZ7InIiKygJEjR0L4m3l+mUyGZcuWYdmyZVes4+Pjg4yMDJNjYbInIiJJkAkCZCYssjOlra0x2RMRkTTo/neY0t5OcTU+ERGRyLFnT0REksBhfCIiIrGT8H72TPZERCQNpr4Fz4579pyzJyIiEjn27ImISBKs/Qa9roTJ3o5sOvQt/IMa25TvfNcPaQtDbBARWYqDg4AHZv+C0Xf+im49mvBruQs+/8AXGWsD8MfeW9TV/d+AE4gNLkKosgoNzY745oIKq04MR1GNt77OPdeewe2hP+I6n4vwdGlC1HtTcanJcMtUpUs9Ft5wCKN7/QwdZNjzc2+s+PpGXG52tvI3snMSHsZnsrcjT9wRAQfHPz5f0/cyUjN+wMFPu9kuKLKIf84ow/gHLuDlp0Lx8w9uuHZgHVJWFaGuxhEfp/tf/QLUJQzzL8N/86/DyYt+cHLQIWXIMbwTk4nbPrkHv/8vUbs6NeNgaTAOlgZjzvVH273OyzfvQw+3y3jo89vh7KBD6j++xHJ1Np46GGPNr0N2zKZz9qmpqRg2bBi8vLzg5+eHSZMmIT8/35YhdWnVlc747cIfxw1jqlH6kxzfHfGydWhkZhFRtTiS5Y1jX3ij/Lwch3b54MRBJcIH19o6NDLCI/vGY3thPxRU++D733wx76tR6OlZi+t8LujrbDo7EP8+NQR5F/zavUYf5W8Y0bMEz+Xcgu8u+iO3IgDLj92E8dcUwM+tzlpfRRRkOtMPe2XTZJ+dnY2kpCQcOXIEWVlZaGpqQmxsLOrq+Bf4apycdRh956/Ys80XHNYVnzO5nhj8jxr0DK0HAIT2v4zrhl7C1/u9bRsYmcTLpWUarrrR9So1/zC4RzmqG1xw6tc/fhk4XNYLOkGGQb4Vf9OS2uhC+9lbm02H8Xfv3m3wOT09HX5+fsjNzcWIESPa1G9oaEBDQ4P+c01NjcVj7KrUsVXwVDQj631fW4dCFrDtjQC4e2rx1hcnodPK4OAoYNOqnvhyR3dbh0adJIOA54Z9hdwKFX6s8ulwux6ul/FrvZtBmVZwQHWDHL5ul80dJolUl5qzr66uBtCyy097UlNTsXTpUmuG1GWNvecCvt6vRGWFi61DIQsYcXslRk/6Ff96ojd+/sENfSIu4/8WF7cs1PuQv+DZo8XRB3GtdyXu2z3J1qFIl4RfqtNlnrPX6XSYNWsWbrzxRgwYMKDdOvPnz0d1dbX+KCkpsXKUXYNfzwYMvqkGu7f0sHUoZCGPPFuCbesCkL2zO37Kd8e+7b7YvkGFex4vs3Vo1AmLbjiIUb1+xpS9d6D8sqdRbS/Uu6O76+8GZY4yHZTyBlz83d2cYYpe6+tyTTnsVZfp2SclJeHUqVM4dOjQFevI5XLI5fIrnpeK2H9eRPWvzjj2hbetQyELkbvpoNMZrsXQaQGZg/3+YyNNAhbdcAi3BhfhgT134Hytwugr5F3wh1LeiOt8LuB0Zcsv+MNVv8BBJuDbi+0v6iP6qy6R7JOTk5GZmYkDBw6gV69etg6nS5PJBNz6z4vI+qA7dFouzBOro597497kUlwodWkZxr/uMu58pBx7t3EI354sjj6ICaEFmPHlWNQ1ucDXtWWO/VKTCxq0Lf/8+rpeRg+3ywjxalmDFN6tEnVNziit80R1oysKq7vhwC9BeF6djcVHboaTgw6Log/h05/CUPG7h82+m13ic/a2IQgCZs6cie3bt2P//v0IDQ21ZTh2YchNNfDv1Yi92ziEL2ZvLA7BlKd+QdLyn+Ht2/JSnc8yemDzq4G2Do2MkBB+BgCwOe4Tg/J5X43E9sJ+AID7wk9j5qBc/bmMsR+3qfPUwTFYFH0I6bGZEAQZ9hSH4vljN1njK4iLANP2pLffXA+ZINjuV5XHH38cGRkZ+PjjjxEeHq4vVyqVcHNz+5uWLWpqaqBUKjHK+Z9wkvFNUkRicm55lK1DICvQ1dejaPFzqK6uhkJh/DRHR7TmitFDnoGTY8cfe/yrZm09vvjmRYvGaik2XaC3bt06VFdXY+TIkQgICNAfW7dutWVYREREomLzYXwiIiKrEGDinL3ZIrG6LrFAj4iIyOIkvECvyzxnT0RERJbBnj0REUmDDqZtJWLHG+Ew2RMRkSSY+hY8e36DHofxiYiIRI49eyIikgYJL9BjsiciImmQcLLnMD4REZHIsWdPRETSwJ49ERGRyOnMcBhhyZIlkMlkBke/fv305+vr65GUlITu3bvD09MT8fHxKC8vN/FLto/JnoiIJKH10TtTDmNdd911KCsr0x+HDh3Sn5s9ezZ27tyJ999/H9nZ2SgtLcXkyZPN+ZX1OIxPRERkhJqaGoPPcrkccrm83bpOTk5QqVRtyqurq7FhwwZkZGRg9OjRAICNGzeif//+OHLkCIYPH27WmNmzJyIiaWidszflABAUFASlUqk/UlNTr3jLH3/8EYGBgejduzcSEhJQXFwMAMjNzUVTUxNiYmL0dfv164fg4GDk5OSY/auzZ09ERNKgEwCZCYvsdC1tS0pKDPazv1KvPjo6Gunp6QgPD0dZWRmWLl2Km2++GadOnYJGo4GLiwu8vb0N2vj7+0Oj0XQ+xitgsiciIjKCQqEwSPZXMm7cOP2fBw4ciOjoaISEhGDbtm1wc3OzZIhtcBifiIikwUzD+J3l7e2Nvn37oqCgACqVCo2NjaiqqjKoU15e3u4cv6mY7ImISCJMTfSmJfva2loUFhYiICAAUVFRcHZ2xr59+/Tn8/PzUVxcDLVabeL3bIvD+ERERBYwZ84cTJgwASEhISgtLcXixYvh6OiI++67D0qlEtOmTUNKSgp8fHygUCgwc+ZMqNVqs6/EB5jsiYhIKqz8Br3z58/jvvvuw6+//ooePXrgpptuwpEjR9CjRw8AwOrVq+Hg4ID4+Hg0NDQgLi4Ob7zxRufj+xtM9kREJA06E4fidca13bJly9+ed3V1RVpaGtLS0jofUwdxzp6IiEjk2LMnIiJpEHQthynt7RSTPRERSYOEd71jsiciImmw8px9V8I5eyIiIpFjz56IiKSBw/hEREQiJ8DEZG+2SKyOw/hEREQix549ERFJA4fxiYiIRE6nA2DCs/I6+33OnsP4REREIseePRERSQOH8YmIiEROwsmew/hEREQix549ERFJg4Rfl8tkT0REkiAIOggm7FxnSltbY7InIiJpEATTeuecsyciIqKuij17IiKSBsHEOXs77tkz2RMRkTTodIDMhHl3O56z5zA+ERGRyLFnT0RE0sBhfCIiInETdDoIJgzj2/OjdxzGJyIiEjn27ImISBo4jE9ERCRyOgGQSTPZcxifiIhI5NizJyIiaRAEAKY8Z2+/PXsmeyIikgRBJ0AwYRhfYLInIiLq4gQdTOvZ89E7IiIiakdaWhquueYauLq6Ijo6GseOHbN6DEz2REQkCYJOMPkw1tatW5GSkoLFixfjxIkTGDRoEOLi4lBRUWGBb3hlTPZERCQNgs70w0ivvPIKpk+fjqlTpyIiIgLr16+Hu7s73nnnHQt8wSuz6zn71sUSzUKTjSMhInPT1dfbOgSygtafszUWvzWjyaR36jSjJdfU1NQYlMvlcsjl8jb1GxsbkZubi/nz5+vLHBwcEBMTg5ycnM4H0gl2newvXboEADjYvMO2gRCR+S1+39YRkBVdunQJSqXSItd2cXGBSqXCIc0uk6/l6emJoKAgg7LFixdjyZIlbepevHgRWq0W/v7+BuX+/v74/vvvTY7FGHad7AMDA1FSUgIvLy/IZDJbh2M1NTU1CAoKQklJCRQKha3DIQviz1o6pPqzFgQBly5dQmBgoMXu4erqiqKiIjQ2Npp8LUEQ2uSb9nr1XY1dJ3sHBwf06tXL1mHYjEKhkNQ/ClLGn7V0SPFnbake/Z+5urrC1dXV4vf5M19fXzg6OqK8vNygvLy8HCqVyqqxcIEeERGRBbi4uCAqKgr79u3Tl+l0Ouzbtw9qtdqqsdh1z56IiKgrS0lJQWJiIoYOHYobbrgBa9asQV1dHaZOnWrVOJjs7ZBcLsfixYvtYp6ITMOftXTwZy1O99xzDy5cuIBFixZBo9Fg8ODB2L17d5tFe5YmE+z5Zb9ERER0VZyzJyIiEjkmeyIiIpFjsiciIhI5JnsiIiKRY7K3M11hq0SyvAMHDmDChAkIDAyETCbDjh07bB0SWUhqaiqGDRsGLy8v+Pn5YdKkScjPz7d1WCQyTPZ2pKtslUiWV1dXh0GDBiEtLc3WoZCFZWdnIykpCUeOHEFWVhaampoQGxuLuro6W4dGIsJH7+xIdHQ0hg0bhtdffx1Ay5uYgoKCMHPmTDzzzDM2jo4sRSaTYfv27Zg0aZKtQyEruHDhAvz8/JCdnY0RI0bYOhwSCfbs7UTrVokxMTH6MlttlUhEllNdXQ0A8PHxsXEkJCZM9nbi77ZK1Gg0NoqKiMxJp9Nh1qxZuPHGGzFgwABbh0MiwtflEhF1EUlJSTh16hQOHTpk61BIZJjs7URX2iqRiMwvOTkZmZmZOHDggKS37ibL4DC+nehKWyUSkfkIgoDk5GRs374dX3zxBUJDQ20dEokQe/Z2pKtslUiWV1tbi4KCAv3noqIi5OXlwcfHB8HBwTaMjMwtKSkJGRkZ+Pjjj+Hl5aVfg6NUKuHm5mbj6Egs+OidnXn99dexatUq/VaJa9euRXR0tK3DIjPbv38/Ro0a1aY8MTER6enp1g+ILEYmk7VbvnHjRjz00EPWDYZEi8meiIhI5DhnT0REJHJM9kRERCLHZE9ERCRyTPZEREQix2RPREQkckz2REREIsdkT0REJHJM9kRERCLHZE9kooceegiTJk3Sfx45ciRmzZpl9Tj2798PmUyGqqqqK9aRyWTYsWNHh6+5ZMkSDB482KS4fvrpJ8hkMuTl5Zl0HSLqPCZ7EqWHHnoIMpkMMpkMLi4uCAsLw7Jly9Dc3Gzxe3/00UdYvnx5h+p2JEETEZmKG+GQaI0dOxYbN25EQ0MDdu3ahaSkJDg7O2P+/Plt6jY2NsLFxcUs9/Xx8THLdYiIzIU9exItuVwOlUqFkJAQzJgxAzExMfjkk08A/DH0vmLFCgQGBiI8PBwAUFJSgrvvvhve3t7w8fHBxIkT8dNPP+mvqdVqkZKSAm9vb3Tv3h1PP/00/rq9xF+H8RsaGjBv3jwEBQVBLpcjLCwMGzZswE8//aTf7KZbt26QyWT6jU90Oh1SU1MRGhoKNzc3DBo0CB988IHBfXbt2oW+ffvCzc0No0aNMoizo+bNm4e+ffvC3d0dvXv3xsKFC9HU1NSm3ptvvomgoCC4u7vj7rvvRnV1tcH5t99+G/3794erqyv69euHN954w+hYiMhymOxJMtzc3NDY2Kj/vG/fPuTn5yMrKwuZmZloampCXFwcvLy8cPDgQXz11Vfw9PTE2LFj9e1efvllpKen45133sGhQ4dQWVmJ7du3/+19p0yZgvfeew9r167F2bNn8eabb8LT0xNBQUH48MMPAQD5+fkoKyvDq6++CgBITU3Fu+++i/Xr1+P06dOYPXs2HnjgAWRnZwNo+aVk8uTJmDBhAvLy8vDII4/gmWeeMfr/Ey8vL6Snp+PMmTN49dVX8dZbb2H16tUGdQoKCrBt2zbs3LkTu3fvxjfffIPHH39cf37z5s1YtGgRVqxYgbNnz+KFF17AwoULsWnTJqPjISILEYhEKDExUZg4caIgCIKg0+mErKwsQS6XC3PmzNGf9/f3FxoaGvRt/vOf/wjh4eGCTqfTlzU0NAhubm7Cnj17BEEQhICAAGHlypX6801NTUKvXr309xIEQbjllluEJ598UhAEQcjPzxcACFlZWe3G+eWXXwoAhN9++01fVl9fL7i7uwuHDx82qDtt2jThvvvuEwRBEObPny9EREQYnJ83b16ba/0VAGH79u1XPL9q1SohKipK/3nx4sWCo6OjcP78eX3ZZ599Jjg4OAhlZWWCIAhCnz59hIyMDIPrLF++XFCr1YIgCEJRUZEAQPjmm2+ueF8isizO2ZNoZWZmwtPTE01NTdDpdLj//vuxZMkS/fnIyEiDefpvv/0WBQUF8PLyMrhOfX09CgsLUV1djbKyMkRHR+vPOTk5YejQoW2G8lvl5eXB0dERt9xyS4fjLigowOXLl3HrrbcalDc2NmLIkCEAgLNnzxrEAQBqtbrD92i1detWrF27FoWFhaitrUVzczMUCoVBneDgYPTs2dPgPjqdDvn5+fDy8kJhYSGmTZuG6dOn6+s0NzdDqVQaHQ8RWQaTPYnWqFGjsG7dOri4uCAwMBBOToZ/3T08PAw+19bWIioqCps3b25zrR49enQqBjc3N6Pb1NbWAgA+/fRTgyQLtKxDMJecnBwkJCRg6dKliIuLg1KpxJYtW/Dyyy8bHetbb73V5pcPR0dHs8VKRKZhsifR8vDwQFhYWIfrX3/99di6dSv8/Pza9G5bBQQE4OjRoxgxYgSAlh5sbm4urr/++nbrR0ZGQqfTITs7GzExMW3Ot44saLVafVlERATkcjmKi4uvOCLQv39//WLDVkeOHLn6l/yTw4cPIyQkBM8995y+7Oeff25Tr7i4GKWlpQgMDNTfx8HBAeHh4fD390dgYCDOnTuHhIQEo+5PRNbDBXpE/5OQkABfX19MnDgRBw8eRFFREfbv348nnngC58+fBwA8+eSTePHFF7Fjxw58//33ePzxx//2GflrrrkGiYmJePjhh7Fjxw79Nbdt2wYACAkJgUwmQ2ZmJi5cuIDa2lp4eXlhzpw5mD17NjZt2oTCwkKcOHECr732mn7R22OPPYYff/wRc+fORX5+PjIyMpCenm7U97322mtRXFyMLVu2oLCwEGvXrm13saGrqysSExPx7bff4uDBg3jiiSdw9913Q6VSAQCWLl2K1NRUrF27Fj/88ANOnjyJjRs34pVXXjEqHiKyHCZ7ov9xd3fHgQMHEBwcjMmTJ6N///6YNm0a6uvr9T39p556Cg8++CASExOhVqvh5eWFO++882+vu27dOtx11114/PHH0a9fP0yfPh11dXUAgJ49e2Lp0qV45pln4O/vj+TkZADA8uXLsXDhQqSmpqJ///4YO3YsPv30U4SGhgJomUf/8MMPsWPHDgwaNAjr16/HCy+8YNT3veOOOzB79mwkJydj8ODBOHz4MBYuXNimXlhYGCZPnozbbrsNsbGxGDhwoMGjdY888gjefvttbNy4EZGRkbjllluQnp6uj5WIbE8mXGllEREREYkCe/ZEREQix2RPREQkckz2REREIsdkT0REJHJM9kRERCLHZE9ERCRyTPZEREQix2RPREQkckz2REREIsdkT0REJHJM9kRERCL3/1Z5JenfR+rsAAAAAElFTkSuQmCC", + "text/plain": [ + "<Figure size 640x480 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cm =confusion_matrix(y_test, my_pred)\n", + "disp = ConfusionMatrixDisplay(confusion_matrix=cm,)\n", + "disp.plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "XGBoost model accuracy score: 0.9722\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "print('XGBoost model accuracy score: {0:0.4f}'. format(accuracy_score(y_test, my_pred)))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.9605768 0.95337327 0.96634984 0.9565976 0.9635154 ]\n", + " StratifiedKFold Cross-Validation Accuracy: 96.01% | Standard Deviation: 0.47%\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import StratifiedKFold\n", + "\n", + "skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n", + "cvScore = cross_val_score(xgb_clf, X, y, cv=skf, scoring='f1_macro')\n", + "print (cvScore)\n", + "print (\" StratifiedKFold Cross-Validation Accuracy: %0.2f%% | Standard Deviation: %0.2f%%\" % (100*cvScore.mean(), 100*cvScore.std()))\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<h1> 6. Testing Data</h1>" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "predicted marker\n", + "1 36\n", + "0 32\n", + "2 32\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_testpred = xgb_clf.predict(mTest.values)\n", + "y_testpred = pd.DataFrame(y_testpred, columns=['predicted marker'])\n", + "y_testpred.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "y_testpred.to_csv('xgbPredictedlabels.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "mTest[\"marker\"] = y_testpred\n", + "mTest.to_csv('TestingResultsMulti.csv')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}