From b7474d8a593ec6131a2fc5b9a9dca4bdf52d79c4 Mon Sep 17 00:00:00 2001
From: mas1u19 <mas1u19@soton.ac.uk>
Date: Wed, 7 Jun 2023 21:52:35 +0000
Subject: [PATCH] Upload New File

---
 Part_A_Binary.ipynb | 2883 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2883 insertions(+)
 create mode 100644 Part_A_Binary.ipynb

diff --git a/Part_A_Binary.ipynb b/Part_A_Binary.ipynb
new file mode 100644
index 0000000..0f103e1
--- /dev/null
+++ b/Part_A_Binary.ipynb
@@ -0,0 +1,2883 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "5354ae81",
+   "metadata": {},
+   "source": [
+    "# Coursework 2\n",
+    "\n",
+    "## Part A\n",
+    "### A.1\tProblem Description\n",
+    "The task requires the design and implementation of a ML model to classify 100 test traces as normal or abnormal events. The dataset contains 6,000 system traces where half of them is labelled as 0 (normal events) and the other half as 1 (abnormal data injection attack events). Each trace has 128 features where the first 116 columns represent measurements from four PMUs and the remaining 12 columns contain control panel logs, Snort alerts and relay logs. Meanwhile, the last column represents the label. The goal is to predict the labels of 100 test traces without labels using the training dataset to train the ML model.\n",
+    "\n",
+    "### A.2\tData Pre-processing \n",
+    "`Pandas library` is used for data processing given data is available in tabular form. The consistency, missing or invalid entries is checked first and foremost. Categorical data were observed and will be mapped to binary using dummies. Non-categorical data will be normalised.\n",
+    "\n",
+    "Given target values are split between normal and anomalous samples we can start with a logistical regression to find a good fit. The data may be further broken into training and test. The test data that is provided will be used as final validator on accuracy of ML model.\n",
+    "\n",
+    "First, let's do some basic analysis of input data. (Please note that both Training and Test data had missing headers as described in the assignment. This has been manually added.)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1609,
+   "id": "3997c6f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1610,
+   "id": "70a5a0d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 6000 entries, 0 to 5999\n",
+      "Columns: 129 entries, R1-PA1:VH to marker\n",
+      "dtypes: float64(112), int64(17)\n",
+      "memory usage: 5.9 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "raw_data = pd.read_csv('TrainingDataBinary.csv')\n",
+    "raw_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1611,
+   "id": "a10544aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(6000, 129)"
+      ]
+     },
+     "execution_count": 1611,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1612,
+   "id": "65f549be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\syeda\\AppData\\Local\\Temp\\ipykernel_28668\\39642624.py:1: FutureWarning: In a future version of pandas all arguments of DataFrame.any and Series.any will be keyword-only.\n",
+      "  len(raw_data[raw_data.isnull().any(1)])\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 1612,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(raw_data[raw_data.isnull().any(1)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1613,
+   "id": "d592ca2b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Columns: 129 entries, R1-PA1:VH to marker\n",
+      "dtypes: float64(112), int64(17)\n",
+      "memory usage: 6.0 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "raw_data.drop_duplicates(keep='first', inplace=True)\n",
+    "raw_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1614,
+   "id": "283c569c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 1614,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.columns[raw_data.isnull().any()].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1615,
+   "id": "ed5a0f8a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    3000\n",
+       "1    3000\n",
+       "Name: marker, dtype: int64"
+      ]
+     },
+     "execution_count": 1615,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data['marker'].value_counts()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "8d15070d",
+   "metadata": {},
+   "source": [
+    "So, there are 6000 rows of all not-null data and the target column *marker* has value 0 and 1. Further, it's equally divided giving us a good stable sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1616,
+   "id": "081e4ad7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1-PA1:VH</th>\n",
+       "      <th>R1-PM1:V</th>\n",
+       "      <th>R1-PA2:VH</th>\n",
+       "      <th>R1-PM2:V</th>\n",
+       "      <th>R1-PA3:VH</th>\n",
+       "      <th>R1-PM3:V</th>\n",
+       "      <th>R1-PA4:IH</th>\n",
+       "      <th>R1-PM4:I</th>\n",
+       "      <th>R1-PA5:IH</th>\n",
+       "      <th>R1-PM5:I</th>\n",
+       "      <th>...</th>\n",
+       "      <th>control_panel_log4</th>\n",
+       "      <th>relay1_log</th>\n",
+       "      <th>relay2_log</th>\n",
+       "      <th>relay3_log</th>\n",
+       "      <th>relay4_log</th>\n",
+       "      <th>snort_log1</th>\n",
+       "      <th>snort_log2</th>\n",
+       "      <th>snort_log3</th>\n",
+       "      <th>snort_log4</th>\n",
+       "      <th>marker</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>5900</th>\n",
+       "      <td>67.385566</td>\n",
+       "      <td>131108.1288</td>\n",
+       "      <td>-52.591796</td>\n",
+       "      <td>131083.0556</td>\n",
+       "      <td>-172.586347</td>\n",
+       "      <td>131183.3486</td>\n",
+       "      <td>63.208704</td>\n",
+       "      <td>488.17126</td>\n",
+       "      <td>-57.307239</td>\n",
+       "      <td>493.11523</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2981</th>\n",
+       "      <td>-106.856629</td>\n",
+       "      <td>132085.9864</td>\n",
+       "      <td>133.161121</td>\n",
+       "      <td>131459.1546</td>\n",
+       "      <td>13.166570</td>\n",
+       "      <td>132136.1329</td>\n",
+       "      <td>-107.698877</td>\n",
+       "      <td>322.63982</td>\n",
+       "      <td>132.301685</td>\n",
+       "      <td>322.82293</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2293</th>\n",
+       "      <td>-92.704571</td>\n",
+       "      <td>130757.1031</td>\n",
+       "      <td>147.318908</td>\n",
+       "      <td>130732.0298</td>\n",
+       "      <td>27.330087</td>\n",
+       "      <td>130832.3229</td>\n",
+       "      <td>-69.345082</td>\n",
+       "      <td>373.17818</td>\n",
+       "      <td>154.635579</td>\n",
+       "      <td>591.26219</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1276</th>\n",
+       "      <td>-32.268983</td>\n",
+       "      <td>132311.6458</td>\n",
+       "      <td>-152.263534</td>\n",
+       "      <td>131659.7408</td>\n",
+       "      <td>87.748486</td>\n",
+       "      <td>132361.7923</td>\n",
+       "      <td>-31.948127</td>\n",
+       "      <td>281.80629</td>\n",
+       "      <td>-151.151996</td>\n",
+       "      <td>279.97519</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2346</th>\n",
+       "      <td>137.635922</td>\n",
+       "      <td>131734.9606</td>\n",
+       "      <td>17.664289</td>\n",
+       "      <td>131709.8873</td>\n",
+       "      <td>-102.335992</td>\n",
+       "      <td>131810.1804</td>\n",
+       "      <td>138.312012</td>\n",
+       "      <td>356.51517</td>\n",
+       "      <td>16.060007</td>\n",
+       "      <td>367.13555</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 129 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       R1-PA1:VH     R1-PM1:V   R1-PA2:VH     R1-PM2:V   R1-PA3:VH  \\\n",
+       "5900   67.385566  131108.1288  -52.591796  131083.0556 -172.586347   \n",
+       "2981 -106.856629  132085.9864  133.161121  131459.1546   13.166570   \n",
+       "2293  -92.704571  130757.1031  147.318908  130732.0298   27.330087   \n",
+       "1276  -32.268983  132311.6458 -152.263534  131659.7408   87.748486   \n",
+       "2346  137.635922  131734.9606   17.664289  131709.8873 -102.335992   \n",
+       "\n",
+       "         R1-PM3:V   R1-PA4:IH   R1-PM4:I   R1-PA5:IH   R1-PM5:I  ...  \\\n",
+       "5900  131183.3486   63.208704  488.17126  -57.307239  493.11523  ...   \n",
+       "2981  132136.1329 -107.698877  322.63982  132.301685  322.82293  ...   \n",
+       "2293  130832.3229  -69.345082  373.17818  154.635579  591.26219  ...   \n",
+       "1276  132361.7923  -31.948127  281.80629 -151.151996  279.97519  ...   \n",
+       "2346  131810.1804  138.312012  356.51517   16.060007  367.13555  ...   \n",
+       "\n",
+       "      control_panel_log4  relay1_log  relay2_log  relay3_log  relay4_log  \\\n",
+       "5900                   0           0           0           0           0   \n",
+       "2981                   0           0           0           0           0   \n",
+       "2293                   0           0           0           0           0   \n",
+       "1276                   0           0           0           0           0   \n",
+       "2346                   0           0           0           0           0   \n",
+       "\n",
+       "      snort_log1  snort_log2  snort_log3  snort_log4  marker  \n",
+       "5900           0           0           0           0       0  \n",
+       "2981           0           0           0           0       1  \n",
+       "2293           0           0           0           0       1  \n",
+       "1276           0           0           0           0       1  \n",
+       "2346           0           0           0           0       1  \n",
+       "\n",
+       "[5 rows x 129 columns]"
+      ]
+     },
+     "execution_count": 1616,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.sample(n=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1617,
+   "id": "989201f2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['R1-PA1:VH', 'R1-PM1:V', 'R1-PA2:VH', 'R1-PM2:V', 'R1-PA3:VH',\n",
+       "       'R1-PM3:V', 'R1-PA4:IH', 'R1-PM4:I', 'R1-PA5:IH', 'R1-PM5:I',\n",
+       "       'R1-PA6:IH', 'R1-PM6:I', 'R1-PA7:VH', 'R1-PM7:V', 'R1-PA8:VH',\n",
+       "       'R1-PM8:V', 'R1-PA9:VH', 'R1-PM9:V', 'R1-PA10:IH', 'R1-PM10:I',\n",
+       "       'R1-PA11:IH', 'R1-PM11:I', 'R1-PA12:IH', 'R1-PM12:I', 'R1:F',\n",
+       "       'R1:DF', 'R1-PA:Z', 'R1-PA:ZH', 'R1:S', 'R2-PA1:VH', 'R2-PM1:V',\n",
+       "       'R2-PA2:VH', 'R2-PM2:V', 'R2-PA3:VH', 'R2-PM3:V', 'R2-PA4:IH',\n",
+       "       'R2-PM4:I', 'R2-PA5:IH', 'R2-PM5:I', 'R2-PA6:IH', 'R2-PM6:I',\n",
+       "       'R2-PA7:VH', 'R2-PM7:V', 'R2-PA8:VH', 'R2-PM8:V', 'R2-PA9:VH',\n",
+       "       'R2-PM9:V', 'R2-PA10:IH', 'R2-PM10:I', 'R2-PA11:IH', 'R2-PM11:I',\n",
+       "       'R2-PA12:IH', 'R2-PM12:I', 'R2:F', 'R2:DF', 'R2-PA:Z', 'R2-PA:ZH',\n",
+       "       'R2:S', 'R3-PA1:VH', 'R3-PM1:V', 'R3-PA2:VH', 'R3-PM2:V',\n",
+       "       'R3-PA3:VH', 'R3-PM3:V', 'R3-PA4:IH', 'R3-PM4:I', 'R3-PA5:IH',\n",
+       "       'R3-PM5:I', 'R3-PA6:IH', 'R3-PM6:I', 'R3-PA7:VH', 'R3-PM7:V',\n",
+       "       'R3-PA8:VH', 'R3-PM8:V', 'R3-PA9:VH', 'R3-PM9:V', 'R3-PA10:IH',\n",
+       "       'R3-PM10:I', 'R3-PA11:IH', 'R3-PM11:I', 'R3-PA12:IH', 'R3-PM12:I',\n",
+       "       'R3:F', 'R3:DF', 'R3-PA:Z', 'R3-PA:ZH', 'R3:S', 'R4-PA1:VH',\n",
+       "       'R4-PM1:V', 'R4-PA2:VH', 'R4-PM2:V', 'R4-PA3:VH', 'R4-PM3:V',\n",
+       "       'R4-PA4:IH', 'R4-PM4:I', 'R4-PA5:IH', 'R4-PM5:I', 'R4-PA6:IH',\n",
+       "       'R4-PM6:I', 'R4-PA7:VH', 'R4-PM7:V', 'R4-PA8:VH', 'R4-PM8:V',\n",
+       "       'R4-PA9:VH', 'R4-PM9:V', 'R4-PA10:IH', 'R4-PM10:I', 'R4-PA11:IH',\n",
+       "       'R4-PM11:I', 'R4-PA12:IH', 'R4-PM12:I', 'R4:F', 'R4:DF', 'R4-PA:Z',\n",
+       "       'R4-PA:ZH', 'R4:S', 'control_panel_log1', 'control_panel_log2',\n",
+       "       'control_panel_log3', 'control_panel_log4', 'relay1_log',\n",
+       "       'relay2_log', 'relay3_log', 'relay4_log', 'snort_log1',\n",
+       "       'snort_log2', 'snort_log3', 'snort_log4', 'marker'], dtype=object)"
+      ]
+     },
+     "execution_count": 1617,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "column_names = raw_data.columns.values\n",
+    "column_names"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "131b20d5",
+   "metadata": {},
+   "source": [
+    "### A.3\tAdvanced Data Analysis\n",
+    "It is understood that first 116 columns have current/voltage data so these are continuous real value data. Next 16 columns are switch (0/1) and last column is target that was just observed to contain half as 0 and other half as 1.\n",
+    "\n",
+    "Let's normalise real value data removing anomalous values, if any.\n",
+    "\n",
+    "Minimum, maximum, and number of unique data points for each feature are computed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1618,
+   "id": "80f7ee9a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 129 entries, 0 to 128\n",
+      "Data columns (total 3 columns):\n",
+      " #   Column  Non-Null Count  Dtype  \n",
+      "---  ------  --------------  -----  \n",
+      " 0   minn    129 non-null    float64\n",
+      " 1   maxx    129 non-null    float64\n",
+      " 2   uniq    129 non-null    int64  \n",
+      "dtypes: float64(2), int64(1)\n",
+      "memory usage: 4.0 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "minn = [raw_data[c].min() for c in column_names]\n",
+    "maxx = [raw_data[c].max() for c in column_names]\n",
+    "uniq = [len(raw_data[c].unique()) for c in column_names]\n",
+    "idx=[x for x in range(len(minn))]\n",
+    "data_stat = pd.DataFrame({\"minn\": minn, \"maxx\": maxx, \"uniq\": uniq},index=idx)\n",
+    "data_stat.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1619,
+   "id": "0d21a61e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>minn</th>\n",
+       "      <th>maxx</th>\n",
+       "      <th>uniq</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>116</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>117</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>118</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>119</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>126</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>127</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     minn  maxx  uniq\n",
+       "116   0.0   0.0     1\n",
+       "117   0.0   0.0     1\n",
+       "118   0.0   0.0     1\n",
+       "119   0.0   0.0     1\n",
+       "126   0.0   0.0     1\n",
+       "127   0.0   0.0     1"
+      ]
+     },
+     "execution_count": 1619,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Feature with the same value across the sample\n",
+    "data_stat[data_stat.uniq == 1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1620,
+   "id": "ad863ae4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>minn</th>\n",
+       "      <th>maxx</th>\n",
+       "      <th>uniq</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>270336.0</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>57</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>270336.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>86</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>270336.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>115</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>270336.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>116</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>117</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>118</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>119</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>120</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>121</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>122</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>123</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>124</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>125</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>126</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>127</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>128</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     minn      maxx  uniq\n",
+       "28    0.0  270336.0     4\n",
+       "57    0.0  270336.0     2\n",
+       "86    0.0  270336.0     3\n",
+       "115   0.0  270336.0     3\n",
+       "116   0.0       0.0     1\n",
+       "117   0.0       0.0     1\n",
+       "118   0.0       0.0     1\n",
+       "119   0.0       0.0     1\n",
+       "120   0.0       1.0     2\n",
+       "121   0.0       1.0     2\n",
+       "122   0.0       1.0     2\n",
+       "123   0.0       1.0     2\n",
+       "124   0.0       1.0     2\n",
+       "125   0.0       1.0     2\n",
+       "126   0.0       0.0     1\n",
+       "127   0.0       0.0     1\n",
+       "128   0.0       1.0     2"
+      ]
+     },
+     "execution_count": 1620,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Looking for categorical features... looking for feature with no more than 9 unique values\n",
+    "data_stat[data_stat.uniq < 10]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "2ff927af",
+   "metadata": {},
+   "source": [
+    "Columns 116 onward are control values and were expected to be 0 or 1. Moreover column# 116-119 and 126-127 have all 0 and therefore, can be dropped.\n",
+    "\n",
+    "It's also interesting to note that 4 other columns have categorical data. Let's explore these."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1621,
+   "id": "71c92946",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([     0,   2058,   2048, 270336], dtype=int64)"
+      ]
+     },
+     "execution_count": 1621,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[column_names[28]].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1622,
+   "id": "6f86ffd4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([     0, 270336], dtype=int64)"
+      ]
+     },
+     "execution_count": 1622,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[column_names[57]].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1623,
+   "id": "2311b43d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([     0,   2048, 270336], dtype=int64)"
+      ]
+     },
+     "execution_count": 1623,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[column_names[86]].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1624,
+   "id": "726a7bb9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([     0,   2058, 270336], dtype=int64)"
+      ]
+     },
+     "execution_count": 1624,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[column_names[115]].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1625,
+   "id": "1cfdd4ba",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['R1:S', 'R2:S', 'R3:S', 'R4:S']"
+      ]
+     },
+     "execution_count": 1625,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[column_names[28], column_names[57], column_names[86], column_names[115]]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "b3d245bd",
+   "metadata": {},
+   "source": [
+    "So, columns R#:S have categorical values that take on 4 possible values of 0, 2048, 2058, and 270336. Dummy columns are created to hold binary and drop original data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1626,
+   "id": "9f105bf3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1:S_0</th>\n",
+       "      <th>R1:S_2048</th>\n",
+       "      <th>R1:S_2058</th>\n",
+       "      <th>R1:S_270336</th>\n",
+       "      <th>R2:S_0</th>\n",
+       "      <th>R2:S_270336</th>\n",
+       "      <th>R3:S_0</th>\n",
+       "      <th>R3:S_2048</th>\n",
+       "      <th>R3:S_270336</th>\n",
+       "      <th>R4:S_0</th>\n",
+       "      <th>R4:S_2058</th>\n",
+       "      <th>R4:S_270336</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3088</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1152</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5995</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2602</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3244</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      R1:S_0  R1:S_2048  R1:S_2058  R1:S_270336  R2:S_0  R2:S_270336  R3:S_0  \\\n",
+       "3088       1          0          0            0       1            0       1   \n",
+       "1152       1          0          0            0       1            0       1   \n",
+       "5995       1          0          0            0       1            0       1   \n",
+       "2602       1          0          0            0       1            0       1   \n",
+       "3244       1          0          0            0       1            0       1   \n",
+       "2722       1          0          0            0       1            0       1   \n",
+       "\n",
+       "      R3:S_2048  R3:S_270336  R4:S_0  R4:S_2058  R4:S_270336  \n",
+       "3088          0            0       1          0            0  \n",
+       "1152          0            0       1          0            0  \n",
+       "5995          0            0       1          0            0  \n",
+       "2602          0            0       1          0            0  \n",
+       "3244          0            0       1          0            0  \n",
+       "2722          0            0       1          0            0  "
+      ]
+     },
+     "execution_count": 1626,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#lets make a copy of original\n",
+    "df = raw_data.copy()\n",
+    "df.replace([np.inf, -np.inf], np.nan, inplace=True)\n",
+    "df.dropna(inplace=True)\n",
+    "\n",
+    "column_names_RS = [column_names[28], column_names[57], column_names[86], column_names[115]]\n",
+    "RS_frame = df[column_names_RS]\n",
+    "RS_frame = RS_frame.astype('int')\n",
+    "RS_frame = RS_frame.astype('category')\n",
+    "RS_frame = pd.get_dummies(RS_frame, dtype=int)\n",
+    "RS_frame.sample(n=6)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "54517212",
+   "metadata": {},
+   "source": [
+    "We have created dummy columns corresponding to categorical values of RS. We can drop these now."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1627,
+   "id": "19de15f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "6000 rows\n",
+      "** R1-PA1:VH:5530 (92%) here\n",
+      "** R1-PM1:V:357 (5%) here\n",
+      "** R1-PA2:VH:5550 (92%) here\n",
+      "** R1-PM2:V:347 (5%) here\n",
+      "** R1-PA3:VH:5515 (91%) here\n",
+      "** R1-PM3:V:350 (5%) here\n",
+      "** R1-PA4:IH:5616 (93%) here\n",
+      "** R1-PM4:I:1587 (26%) here\n",
+      "** R1-PA5:IH:5566 (92%) here\n",
+      "** R1-PM5:I:1489 (24%) here\n",
+      "** R1-PA6:IH:5569 (92%) here\n",
+      "** R1-PM6:I:1570 (26%) here\n",
+      "** R1-PA7:VH:5533 (92%) here\n",
+      "** R1-PM7:V:360 (6%) here\n",
+      "** R1-PA8:VH:48 (0%) here\n",
+      "** R1-PM8:V:47 (0%) here\n",
+      "** R1-PA9:VH:44 (0%) here\n",
+      "** R1-PM9:V:40 (0%) here\n",
+      "** R1-PA10:IH:5561 (92%) here\n",
+      "** R1-PM10:I:1526 (25%) here\n",
+      "** R1-PA11:IH:2152 (35%) here\n",
+      "** R1-PM11:I:184 (3%) here\n",
+      "** R1-PA12:IH:2185 (36%) here\n",
+      "** R1-PM12:I:187 (3%) here\n",
+      "** R1:F:103 (1%) here\n",
+      "** R1:DF:101 (1%) here\n",
+      "** R1-PA:Z:5999 (99%) here\n",
+      "** R1-PA:ZH:5913 (98%) here\n",
+      "** R1:S:[0:99.22%,2058:0.6%,270336:0.17%,2048:0.02%]\n",
+      "** R2-PA1:VH:5659 (94%) here\n",
+      "** R2-PM1:V:4602 (76%) here\n",
+      "** R2-PA2:VH:5661 (94%) here\n",
+      "** R2-PM2:V:4577 (76%) here\n",
+      "** R2-PA3:VH:5658 (94%) here\n",
+      "** R2-PM3:V:4593 (76%) here\n",
+      "** R2-PA4:IH:5690 (94%) here\n",
+      "** R2-PM4:I:4968 (82%) here\n",
+      "** R2-PA5:IH:5711 (95%) here\n",
+      "** R2-PM5:I:4838 (80%) here\n",
+      "** R2-PA6:IH:5743 (95%) here\n",
+      "** R2-PM6:I:4957 (82%) here\n",
+      "** R2-PA7:VH:5670 (94%) here\n",
+      "** R2-PM7:V:4599 (76%) here\n",
+      "** R2-PA8:VH:50 (0%) here\n",
+      "** R2-PM8:V:50 (0%) here\n",
+      "** R2-PA9:VH:47 (0%) here\n",
+      "** R2-PM9:V:47 (0%) here\n",
+      "** R2-PA10:IH:5695 (94%) here\n",
+      "** R2-PM10:I:4924 (82%) here\n",
+      "** R2-PA11:IH:2131 (35%) here\n",
+      "** R2-PM11:I:1253 (20%) here\n",
+      "** R2-PA12:IH:2162 (36%) here\n",
+      "** R2-PM12:I:1255 (20%) here\n",
+      "** R2:F:108 (1%) here\n",
+      "** R2:DF:107 (1%) here\n",
+      "** R2-PA:Z:6000 (100%) here\n",
+      "** R2-PA:ZH:5886 (98%) here\n",
+      "** R2:S:[0:99.83%,270336:0.17%]\n",
+      "** R3-PA1:VH:5534 (92%) here\n",
+      "** R3-PM1:V:393 (6%) here\n",
+      "** R3-PA2:VH:5536 (92%) here\n",
+      "** R3-PM2:V:389 (6%) here\n",
+      "** R3-PA3:VH:5539 (92%) here\n",
+      "** R3-PM3:V:384 (6%) here\n",
+      "** R3-PA4:IH:5602 (93%) here\n",
+      "** R3-PM4:I:1601 (26%) here\n",
+      "** R3-PA5:IH:5575 (92%) here\n",
+      "** R3-PM5:I:1464 (24%) here\n",
+      "** R3-PA6:IH:5614 (93%) here\n",
+      "** R3-PM6:I:1571 (26%) here\n",
+      "** R3-PA7:VH:5554 (92%) here\n",
+      "** R3-PM7:V:392 (6%) here\n",
+      "** R3-PA8:VH:50 (0%) here\n",
+      "** R3-PM8:V:49 (0%) here\n",
+      "** R3-PA9:VH:47 (0%) here\n",
+      "** R3-PM9:V:46 (0%) here\n",
+      "** R3-PA10:IH:5548 (92%) here\n",
+      "** R3-PM10:I:1560 (26%) here\n",
+      "** R3-PA11:IH:2063 (34%) here\n",
+      "** R3-PM11:I:181 (3%) here\n",
+      "** R3-PA12:IH:2053 (34%) here\n",
+      "** R3-PM12:I:175 (2%) here\n",
+      "** R3:F:92 (1%) here\n",
+      "** R3:DF:111 (1%) here\n",
+      "** R3-PA:Z:5999 (99%) here\n",
+      "** R3-PA:ZH:5902 (98%) here\n",
+      "** R3:S:[0:99.77%,270336:0.17%,2048:0.07%]\n",
+      "** R4-PA1:VH:5664 (94%) here\n",
+      "** R4-PM1:V:1792 (29%) here\n",
+      "** R4-PA2:VH:5668 (94%) here\n",
+      "** R4-PM2:V:1781 (29%) here\n",
+      "** R4-PA3:VH:5674 (94%) here\n",
+      "** R4-PM3:V:1794 (29%) here\n",
+      "** R4-PA4:IH:5699 (94%) here\n",
+      "** R4-PM4:I:2866 (47%) here\n",
+      "** R4-PA5:IH:5692 (94%) here\n",
+      "** R4-PM5:I:2752 (45%) here\n",
+      "** R4-PA6:IH:5693 (94%) here\n",
+      "** R4-PM6:I:2859 (47%) here\n",
+      "** R4-PA7:VH:5687 (94%) here\n",
+      "** R4-PM7:V:1781 (29%) here\n",
+      "** R4-PA8:VH:50 (0%) here\n",
+      "** R4-PM8:V:48 (0%) here\n",
+      "** R4-PA9:VH:45 (0%) here\n",
+      "** R4-PM9:V:39 (0%) here\n",
+      "** R4-PA10:IH:5695 (94%) here\n",
+      "** R4-PM10:I:2826 (47%) here\n",
+      "** R4-PA11:IH:2090 (34%) here\n",
+      "** R4-PM11:I:696 (11%) here\n",
+      "** R4-PA12:IH:2078 (34%) here\n",
+      "** R4-PM12:I:726 (12%) here\n",
+      "** R4:F:117 (1%) here\n",
+      "** R4:DF:96 (1%) here\n",
+      "** R4-PA:Z:5999 (99%) here\n",
+      "** R4-PA:ZH:5891 (98%) here\n",
+      "** R4:S:[0:99.82%,270336:0.17%,2058:0.02%]\n",
+      "** control_panel_log1:[0:100.0%]\n",
+      "** control_panel_log2:[0:100.0%]\n",
+      "** control_panel_log3:[0:100.0%]\n",
+      "** control_panel_log4:[0:100.0%]\n",
+      "** relay1_log:[0:99.87%,1:0.13%]\n",
+      "** relay2_log:[0:99.88%,1:0.12%]\n",
+      "** relay3_log:[0:99.95%,1:0.05%]\n",
+      "** relay4_log:[0:99.97%,1:0.03%]\n",
+      "** snort_log1:[0:99.98%,1:0.02%]\n",
+      "** snort_log2:[0:99.98%,1:0.02%]\n",
+      "** snort_log3:[0:100.0%]\n",
+      "** snort_log4:[0:100.0%]\n",
+      "** marker:[0:50.0%,1:50.0%]\n"
+     ]
+    }
+   ],
+   "source": [
+    "ENCODING = 'utf-8'\n",
+    "\n",
+    "def expand_categories(values):\n",
+    "    result = []\n",
+    "    s = values.value_counts()\n",
+    "    t = float(len(values))\n",
+    "    for v in s.index:\n",
+    "        result.append(\"{}:{}%\".format(v,round(100*(s[v]/t),2)))\n",
+    "    return \"[{}]\".format(\",\".join(result))\n",
+    "        \n",
+    "def analyze(df):\n",
+    "    print()\n",
+    "    cols = df.columns.values\n",
+    "    total = float(len(df))\n",
+    "\n",
+    "    print(\"{} rows\".format(int(total)))\n",
+    "    for col in cols:\n",
+    " \n",
+    "        uniques = df[col].unique()\n",
+    "        unique_count = len(uniques)\n",
+    "        if unique_count>10:\n",
+    "            print(\"** {}:{} ({}%) here\".format(col,unique_count,int(((unique_count)/total)*100)))\n",
+    "        else:\n",
+    "            print(\"** {}:{}\".format(col,expand_categories(df[col])))\n",
+    "            expand_categories(df[col])\n",
+    "\n",
+    "# Analyze KDD-99\n",
+    "\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "import numpy as np\n",
+    "from sklearn import metrics\n",
+    "from scipy.stats import zscore\n",
+    "\n",
+    "analyze(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1628,
+   "id": "4311c15b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Columns: 125 entries, R1-PA1:VH to marker\n",
+      "dtypes: float64(112), int64(13)\n",
+      "memory usage: 5.8 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "column_names_updated = np.delete(column_names, [28, 57, 86, 115])\n",
+    "df = df.drop(column_names_RS, axis=1)\n",
+    "\n",
+    "df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1629,
+   "id": "f21a5139",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(6000, 112)"
+      ]
+     },
+     "execution_count": 1629,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "#let's scale numerical feature\n",
+    "unscaled_input = df.iloc[:,:-13]\n",
+    "scaler = StandardScaler().fit(unscaled_input)\n",
+    "scaled_input = scaler.transform(unscaled_input)\n",
+    "scaled_input[np.isnan(scaled_input)] = 1\n",
+    "#np.isnan(scaled_input.sum())\n",
+    "scaled_input.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1630,
+   "id": "91c3e8c2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Columns: 112 entries, R1-PA1:VH to R4-PA:ZH\n",
+      "dtypes: float64(112)\n",
+      "memory usage: 5.2 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df1_scaled = pd.DataFrame(scaled_input, index=df.index, columns=column_names_updated[:scaled_input.shape[1]])\n",
+    "df1_scaled.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1631,
+   "id": "4a23f18c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Data columns (total 12 columns):\n",
+      " #   Column              Non-Null Count  Dtype\n",
+      "---  ------              --------------  -----\n",
+      " 0   control_panel_log1  6000 non-null   int64\n",
+      " 1   control_panel_log2  6000 non-null   int64\n",
+      " 2   control_panel_log3  6000 non-null   int64\n",
+      " 3   control_panel_log4  6000 non-null   int64\n",
+      " 4   relay1_log          6000 non-null   int64\n",
+      " 5   relay2_log          6000 non-null   int64\n",
+      " 6   relay3_log          6000 non-null   int64\n",
+      " 7   relay4_log          6000 non-null   int64\n",
+      " 8   snort_log1          6000 non-null   int64\n",
+      " 9   snort_log2          6000 non-null   int64\n",
+      " 10  snort_log3          6000 non-null   int64\n",
+      " 11  snort_log4          6000 non-null   int64\n",
+      "dtypes: int64(12)\n",
+      "memory usage: 609.4 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df2 = df.iloc[:,-13:-1]\n",
+    "df2.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1632,
+   "id": "ba3d7444",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Data columns (total 12 columns):\n",
+      " #   Column       Non-Null Count  Dtype\n",
+      "---  ------       --------------  -----\n",
+      " 0   R1:S_0       6000 non-null   int32\n",
+      " 1   R1:S_2048    6000 non-null   int32\n",
+      " 2   R1:S_2058    6000 non-null   int32\n",
+      " 3   R1:S_270336  6000 non-null   int32\n",
+      " 4   R2:S_0       6000 non-null   int32\n",
+      " 5   R2:S_270336  6000 non-null   int32\n",
+      " 6   R3:S_0       6000 non-null   int32\n",
+      " 7   R3:S_2048    6000 non-null   int32\n",
+      " 8   R3:S_270336  6000 non-null   int32\n",
+      " 9   R4:S_0       6000 non-null   int32\n",
+      " 10  R4:S_2058    6000 non-null   int32\n",
+      " 11  R4:S_270336  6000 non-null   int32\n",
+      "dtypes: int32(12)\n",
+      "memory usage: 328.1 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "#RS dummies\n",
+    "RS_frame.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1633,
+   "id": "2e67286d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Int64Index: 6000 entries, 0 to 5999\n",
+      "Columns: 136 entries, R1-PA1:VH to R4:S_270336\n",
+      "dtypes: float64(112), int32(12), int64(12)\n",
+      "memory usage: 6.0 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "X = pd.concat([df1_scaled, df2, RS_frame], axis=1)\n",
+    "X.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1634,
+   "id": "7d4368da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y = df[column_names[-1]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1635,
+   "id": "5ac91fca",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(6000,)"
+      ]
+     },
+     "execution_count": 1635,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1636,
+   "id": "afdc301d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 10, test_size=0.35)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "36ed5e7b",
+   "metadata": {},
+   "source": [
+    "35% of the data will be used for the testing subset while the remainder will be used for the training subset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1637,
+   "id": "a0524ed8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "\n",
+    "#StratifiedKFold ensures data labels are balanced\n",
+    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=10)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "feb811f5",
+   "metadata": {},
+   "source": [
+    "StratifiedKFold ensures data labels are balanced"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "6af2b794",
+   "metadata": {},
+   "source": [
+    "### A.4\tML Model Evaluation\n",
+    "#### A.4.1\tTraining Result\n",
+    "##### i. Logistic Regression (LR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1638,
+   "id": "f6f3ac4f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LR score =  0.9238095238095239\n",
+      "LR accuracy score = 92.38%\n"
+     ]
+    }
+   ],
+   "source": [
+    "#using Logistic regression\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "clf_lr = LogisticRegression(C=1e2, max_iter=5000).fit(X_train, y_train)\n",
+    "score_lr = clf_lr.score(X_test, y_test) \n",
+    "print(\"LR score = \", score_lr)\n",
+    "print(\"LR accuracy score = %0.2f%%\" % (score_lr*100))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "0e8fb03d",
+   "metadata": {},
+   "source": [
+    "So, >90% accuracy is observed on training result using Logistic Regression.\n",
+    "\n",
+    "Let's do some cross-validation to see result stability."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1639,
+   "id": "762a708e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.92914497 0.91916212 0.93081599 0.91498489 0.91244157]\n",
+      "5 fold cross-validation accuracy [92.13%] with standard deviation [0.74%]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import cross_val_score\n",
+    "\n",
+    "cross_val_scores_lr = cross_val_score(clf_lr, X, y, cv=kfold, scoring='f1_macro')\n",
+    "print(cross_val_scores_lr)\n",
+    "print (\"5 fold cross-validation accuracy [%0.2f%%] with standard deviation [%0.2f%%]\" % (100*cross_val_scores_lr.mean(), 100*cross_val_scores_lr.std()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1640,
+   "id": "f4869aa3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True Negatives:  984 \n",
+      "False Positives:  67 \n",
+      "False Negatives:  93 \n",
+      "True Positives:  956\n",
+      "-------------------------------------------------------\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.91      0.94      0.92      1051\n",
+      "           1       0.93      0.91      0.92      1049\n",
+      "\n",
+      "    accuracy                           0.92      2100\n",
+      "   macro avg       0.92      0.92      0.92      2100\n",
+      "weighted avg       0.92      0.92      0.92      2100\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x12f3f3bfa50>"
+      ]
+     },
+     "execution_count": 1640,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.metrics import ConfusionMatrixDisplay\n",
+    "\n",
+    "y_pred = clf_lr.predict(X_test)\n",
+    "#accuracy, precision, recall, F1\n",
+    "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n",
+    "print('True Negatives: ', tn, '\\nFalse Positives: ', fp, '\\nFalse Negatives: ', fn, '\\nTrue Positives: ', tp)\n",
+    "print('-'*55)\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "ConfusionMatrixDisplay.from_predictions(y_test, y_pred)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "13bb4535",
+   "metadata": {},
+   "source": [
+    "##### ii.\tSupport Vector Machine (SVM) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1641,
+   "id": "4335edba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SVM score =  0.9242857142857143\n",
+      "SVM accuracy score = 92.43%\n"
+     ]
+    }
+   ],
+   "source": [
+    "#using SVM analysis\n",
+    "from sklearn import svm\n",
+    "\n",
+    "clf_svm = svm.SVC(kernel='linear', C=10).fit(X_train, y_train)\n",
+    "score_svm = clf_svm.score(X_test, y_test)\n",
+    "print(\"SVM score = \", score_svm)\n",
+    "print(\"SVM accuracy score = %0.2f%%\" % (score_svm*100))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "667501ff",
+   "metadata": {},
+   "source": [
+    "So, >90% accuracy is observed on training result using SVM.\n",
+    "\n",
+    "Let's do some cross-validation to see result stability."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1642,
+   "id": "b6c60a7e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.92999981 0.91416375 0.92833313 0.91583327 0.91916212]\n",
+      "5 fold cross-validation accuracy [92.15%] with standard deviation [0.65%]\n"
+     ]
+    }
+   ],
+   "source": [
+    "cross_val_scores_svm = cross_val_score(clf_svm, X, y, cv=kfold, scoring='f1_macro')\n",
+    "print(cross_val_scores_svm)\n",
+    "print (\"5 fold cross-validation accuracy [%0.2f%%] with standard deviation [%0.2f%%]\" % (100*cross_val_scores_svm.mean(), 100*cross_val_scores_svm.std()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1643,
+   "id": "f76a4a23",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True Negatives:  966 \n",
+      "False Positives:  85 \n",
+      "False Negatives:  74 \n",
+      "True Positives:  975\n",
+      "-------------------------------------------------------\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.93      0.92      0.92      1051\n",
+      "           1       0.92      0.93      0.92      1049\n",
+      "\n",
+      "    accuracy                           0.92      2100\n",
+      "   macro avg       0.92      0.92      0.92      2100\n",
+      "weighted avg       0.92      0.92      0.92      2100\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x12f4c58d4d0>"
+      ]
+     },
+     "execution_count": 1643,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfsAAAGwCAYAAACuFMx9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA0GElEQVR4nO3deXwU9f3H8ffmDkk2IWgSAiGAyBFOBYvxPiJBqEKhVdqgERFbBRQQFKpcoqLUA0EEiwrSgopaqVDEUk6RiBKEnyJEQZAIJKCBhIC5duf3B7K6xmOX3WTZmdfz8ZjHg535zuwnLfLJ5/P9zozNMAxDAADAtEICHQAAAKhbJHsAAEyOZA8AgMmR7AEAMDmSPQAAJkeyBwDA5Ej2AACYXFigA/CF0+nUgQMHFBcXJ5vNFuhwAABeMgxDx44dU2pqqkJC6q7+rKioUFVVlc/XiYiIUFRUlB8iql9BnewPHDigtLS0QIcBAPBRYWGhmjZtWifXrqioUIv0WBUdcvh8rZSUFO3ZsyfoEn5QJ/u4uDhJ0u78NMXFMiMBc7qxfbdAhwDUmRqjWu863nL9e14XqqqqVHTIoS/zm8sed/q5ouyYU+ld96qqqopkX59Ote7jYkN8+j8QOJOF2cIDHQJQ5+pjKjY2zqbYuNP/HqeCd7o4qJM9AACechhOOXx4G4zDcPovmHpGsgcAWIJThpw6/Wzvy7mBRu8bAACTo7IHAFiCU0750oj37ezAItkDACzBYRhyGKffivfl3ECjjQ8AgMlR2QMALMHKC/RI9gAAS3DKkMOiyZ42PgAAJkdlDwCwBNr4AACYHKvxAQCAaVHZAwAswfnd5sv5wYpkDwCwBIePq/F9OTfQSPYAAEtwGPLxrXf+i6W+MWcPAIDJUdkDACyBOXsAAEzOKZscsvl0frCijQ8AgMlR2QMALMFpnNx8OT9YkewBAJbg8LGN78u5gUYbHwAAk6OyBwBYgpUre5I9AMASnIZNTsOH1fg+nBtotPEBADA5KnsAgCXQxgcAwOQcCpHDh4a2w4+x1DeSPQDAEgwf5+wN5uwBAMCZisoeAGAJzNkDAGByDiNEDsOHOfsgflwubXwAAEyOyh4AYAlO2eT0ocZ1KnhLe5I9AMASrDxnTxsfAACTo7IHAFiC7wv0aOMDAHBGOzln78OLcGjjAwCAMxWVPQDAEpw+Phuf1fgAAJzhmLMHAMDknAqx7H32zNkDAGByVPYAAEtwGDY5fHhNrS/nBhrJHgBgCQ4fF+g5aOMDAIAzFZU9AMASnEaInD6sxneyGh8AgDMbbXwAAGBaVPYAAEtwyrcV9U7/hVLvSPYAAEvw/aE6wdsMD97IAQCAR6jsAQCW4Puz8YO3PibZAwAswcrvsyfZAwAswcqVffBGDgAAPEJlDwCwBN8fqhO89THJHgBgCU7DJqcv99kH8VvvgvfXFAAA4BEqewCAJTh9bOMH80N1SPYAAEvw/a13wZvsgzdyAADgESp7AIAlOGSTw4cH4/hybqCR7AEAlkAbHwAAmBaVPQDAEhzyrRXv8F8o9Y7KHgBgCafa+L5s3nA4HBo/frxatGih6OhonXPOOZoyZYoMw3CNMQxDEyZMUOPGjRUdHa2srCx9/vnnbtcpKSlRTk6O7Ha7EhISNHjwYJWXl3sVC8keAGAJp16E48vmjccee0yzZ8/WM888ox07duixxx7TtGnTNHPmTNeYadOmacaMGZozZ442bdqkmJgYZWdnq6KiwjUmJydH27dv18qVK7Vs2TKtX79et99+u1ex0MYHAKAObNy4UX369FHv3r0lSc2bN9fLL7+sDz74QNLJqn769Ol64IEH1KdPH0nSggULlJycrCVLlmjAgAHasWOHVqxYoQ8//FDdunWTJM2cOVO9evXS448/rtTUVI9iobIHAFiC8d377E93M76b7y8rK3PbKisrf/L7LrroIq1atUqfffaZJGnbtm3asGGDrr32WknSnj17VFRUpKysLNc58fHx6t69u/Ly8iRJeXl5SkhIcCV6ScrKylJISIg2bdrk8c9OZQ8AsAR/vc8+LS3Nbf/EiRM1adKkWuPHjh2rsrIytW3bVqGhoXI4HHr44YeVk5MjSSoqKpIkJScnu52XnJzsOlZUVKSkpCS342FhYUpMTHSN8QTJHgAALxQWFsput7s+R0ZG/uS4xYsXa+HChVq0aJHat2+vrVu3asSIEUpNTVVubm59hSuJZA8AsAh/veLWbre7JfufM2bMGI0dO1YDBgyQJHXs2FFffvmlpk6dqtzcXKWkpEiSiouL1bhxY9d5xcXF6tKliyQpJSVFhw4dcrtuTU2NSkpKXOd7gjl7AIAlOL57650vmzdOnDihkBD3c0JDQ+V0OiVJLVq0UEpKilatWuU6XlZWpk2bNikzM1OSlJmZqaNHjyo/P981ZvXq1XI6nerevbvHsVDZAwBQB6677jo9/PDDatasmdq3b6+PPvpITz75pG699VZJks1m04gRI/TQQw/p3HPPVYsWLTR+/Hilpqaqb9++kqR27dqpZ8+eGjJkiObMmaPq6moNGzZMAwYM8HglvkSyBwBYhL/a+J6aOXOmxo8frzvvvFOHDh1Samqq/vznP2vChAmuMffee6+OHz+u22+/XUePHtUll1yiFStWKCoqyjVm4cKFGjZsmK6++mqFhISof//+mjFjhlex2IwfPsonyJSVlSk+Pl6HCtJlj2NGAuZ0XbrnrTog2NQY1VpT84ZKS0s9mgc/HadyxbANv1NkbPhpX6eyvFrPXPJmncZaV8iQAACYHG18AIAlOAybHD608X05N9BI9gAAS6jvOfszCckeAGAJxmm8ue7H5wer4I0cAAB4hMoeAGAJDtnkkA9z9j6cG2gkewCAJTgN3+bdnUF7ozptfAAATI/KHjpRHqJ/TktV3ooElX4TrpbtT+j2BwvVussJ15jCz6M07+Em+uT9ODlqpGatKzRu7m4lNal2jdmxOUb/eCxVBR/FKCRUatn+hB5c+Lkio4P412GYUkiIoYEjD+iq35WoYVK1vikO1/9eO0uLZqRI37Vq73lir675wzdu521ea9cDN58bgIjhD04fF+j5cm6gkeyhmaPT9WVBtO6ZsVeJydVa869EPTCgtZ5ds11nNa7Wwb0RurdvG13zx6+VM/qAGsQ6tO+zaEVEfp/Ed2yO0cSB5+oPww7qzw8VKjTU0J5PGygkeP/bgIn94Y4i9b7psJ4Y1UJffhalczud0KjH9+r4sVD9e9737w7/cI1dT45u7vpcXRW8c7aQnLLJ6cO8uy/nBtoZ8U/xrFmz1Lx5c0VFRal79+764IMPAh2SZVR+a9N7yxtq0P1fqcOF5UptUamcew6qcfMKvb3gbEnSgseaqNtVpbr1gf06p8O3aty8St17lCrhrBrXdZ6f1FTX3XpIfxhWrPQ2FWraqlKXXn9E4ZFU9TjzZHQ7rvf/m6APVser+KtIbVjeUFvW29Wm83G3cdVVNh05HO7aykupjxCcAp7sX331VY0aNUoTJ07Uli1b1LlzZ2VnZ9d6fy/qhsNhk9Nhq5WUI6MMbf8wVk6ntHlVvFJbVmj8n1opp1MnjfptW+WtiHeNPfp1mAo+ilXCWdUafX0bDezcSWP7t9b2D2Lq+8cBPPLp5hh1ufiYmrSokCS1aHdC7S8o14dr3Z933unCcr2yZZueX/OJhj38peISan7qcggSp56g58sWrAKe7J988kkNGTJEgwYNUkZGhubMmaMGDRroxRdfDHRoltAg1qm2Xcv1ytON9U1RuBwOac0bidqZH6MjxeEq/TpM3x4P1euzUtT1ijJNWfS5Mnse0SO3naOP82IlSUVfRkqSFj2RquycrzV54ec6p8MJ3X9ja+3/IjKQPx7wkxY/m6K1Sxtq7prtWrY7X7Pe3qElLyZpzZJGrjGb19r1+KjmGvvH1nphalN1vLBcDy34XCEhdKuC1ak5e1+2YBXQnlRVVZXy8/M1btw4176QkBBlZWUpLy+v1vjKykpVVla6PpeVldVLnGZ3z4w9evqe5srt2kkhoYbO6XhCl/Ut0a7/ayCn8+Rvshdml6rv7Se7LS07fKsdm2P19j/OVsfMchnOk9fpOfCwrrnx5IKmczp8pW3vxWnlq410y7gDAfm5gJ9z2W+P6Kq+JXpseAt9+Vm0zml/Qn+eWKhviiP0v9dPJvx1SxNd4/cWRGvPzmjN3/CJOmUe09b3guuNZ0BAk/3XX38th8Oh5ORkt/3JycnauXNnrfFTp07V5MmT6ys8y2jcvEqPvvGZKk6E6MSxECUm1+ixv7RQSrMq2RNrFBpmKO3cb93OSTu3Qp9+cLKyb5h8ckV+s9YV7mNaVejw/oj6+SEAL9x2/1da/GyKK6HvLYhWUpMq3XjnQVey/7GifZE6+k2YUptXaut79Rkt/MUpH5+NzwK9+jFu3DiVlpa6tsLCwkCHZCpRDZxKTK5R+dFQbVln14XZRxUeYejczse1f3eU29j9X0QqqWmVJCk5rUqJKVX6qtaYKCU1qaq3+AFPRUY7XV2rU5xOyfYL/yKelVIle8MalRw6/fehI7CM71bjn+5mBHGyD2hlf9ZZZyk0NFTFxcVu+4uLi5WSklJrfGRkpCIjmQP2t/y1dsmQmpxToYN7I/XilKZqek6Fsm78WpLU745iTbujhdpfWK5OFx1T/lq7PliZoKmvF0iSbDap/1+KtfCJVLXIOKGW7b/Vqtca6avdURr3992B/NGAn7TpfwkaMPygDh+I0JefRemc9if0u9sO6b+LT1b1UQ0cGjjioDa8naAjh8PVOL1Sg/+6Xwf2Rip/HS38YMVb7wIkIiJCXbt21apVq9S3b19JktPp1KpVqzRs2LBAhmYpJ8pC9dKjTfT1wXDFJTh0Ua8juvm+/Qr7roC56NqjuvPRfXptZor+PiFNTVpW6K9zd6v9b76/TanPkEOqqrTp+UlpOnY0VC0yvtWUlz9T4+ZU9jjzPDshTTePPqChD+1TwlknH6rz9sKztPDpxpIkp8OmFu2+Vdbvv1GM3aGS4nDlv2vXgsdTVV0VVA1RQJJkMwwjoEtLX331VeXm5uq5557Tb37zG02fPl2LFy/Wzp07a83l/1hZWZni4+N1qCBd9jj+A4Q5XZfePdAhAHWmxqjWmpo3VFpaKru9bromp3LF71YOUnjM6a8jqj5epTevmVensdaVgD8h4sYbb9Thw4c1YcIEFRUVqUuXLlqxYsWvJnoAALxBGz/Ahg0bRtseAIA6ckYkewAA6pqVn41PsgcAWIKV2/isagMAwOSo7AEAlmDlyp5kDwCwBCsne9r4AACYHJU9AMASrFzZk+wBAJZgyLfb5wL6uFkfkewBAJZg5cqeOXsAAEyOyh4AYAlWruxJ9gAAS7BysqeNDwCAyVHZAwAswcqVPckeAGAJhmGT4UPC9uXcQKONDwCAyVHZAwAsgffZAwBgclaes6eNDwCAyVHZAwAswcoL9Ej2AABLsHIbn2QPALAEK1f2zNkDAGByVPYAAEswfGzjB3NlT7IHAFiCIckwfDs/WNHGBwDA5KjsAQCW4JRNNp6gBwCAebEaHwAAmBaVPQDAEpyGTTYeqgMAgHkZho+r8YN4OT5tfAAATI7KHgBgCVZeoEeyBwBYAskeAACTs/ICPebsAQAwOSp7AIAlWHk1PskeAGAJJ5O9L3P2fgymntHGBwDA5KjsAQCWwGp8AABMzpBv76QP4i4+bXwAAMyOyh4AYAm08QEAMDsL9/FJ9gAAa/CxslcQV/bM2QMAYHJU9gAAS+AJegAAmJyVF+jRxgcAwOSo7AEA1mDYfFtkR2UPAMCZ7dScvS+bt/bv36+BAweqUaNGio6OVseOHbV58+YfxGRowoQJaty4saKjo5WVlaXPP//c7RolJSXKycmR3W5XQkKCBg8erPLycq/iINkDAFAHjhw5oosvvljh4eF6++239emnn+qJJ55Qw4YNXWOmTZumGTNmaM6cOdq0aZNiYmKUnZ2tiooK15icnBxt375dK1eu1LJly7R+/XrdfvvtXsVCGx8AYA31/FCdxx57TGlpaZo3b55rX4sWLb6/nGFo+vTpeuCBB9SnTx9J0oIFC5ScnKwlS5ZowIAB2rFjh1asWKEPP/xQ3bp1kyTNnDlTvXr10uOPP67U1FSPYqGyBwBYwqnV+L5sklRWVua2VVZW/uT3vfXWW+rWrZv+8Ic/KCkpSeedd57mzp3rOr5nzx4VFRUpKyvLtS8+Pl7du3dXXl6eJCkvL08JCQmuRC9JWVlZCgkJ0aZNmzz+2T2q7N966y2PL3j99dd7PBYAgGCTlpbm9nnixImaNGlSrXFffPGFZs+erVGjRumvf/2rPvzwQ911112KiIhQbm6uioqKJEnJyclu5yUnJ7uOFRUVKSkpye14WFiYEhMTXWM84VGy79u3r0cXs9lscjgcHn85AAD1yg8PxiksLJTdbnd9joyM/MlxTqdT3bp10yOPPCJJOu+88/TJJ59ozpw5ys3N9T0QL3jUxnc6nR5tJHoAwJnKX218u93utv1csm/cuLEyMjLc9rVr10779u2TJKWkpEiSiouL3cYUFxe7jqWkpOjQoUNux2tqalRSUuIa4wmf5ux/uFoQAIAzmuGHzQsXX3yxCgoK3PZ99tlnSk9Pl3RysV5KSopWrVrlOl5WVqZNmzYpMzNTkpSZmamjR48qPz/fNWb16tVyOp3q3r27x7F4newdDoemTJmiJk2aKDY2Vl988YUkafz48XrhhRe8vRwAAKY0cuRIvf/++3rkkUe0a9cuLVq0SH//+981dOhQSSenvkeMGKGHHnpIb731lj7++GPdfPPNSk1NdU2ft2vXTj179tSQIUP0wQcf6L333tOwYcM0YMAAj1fiS6eR7B9++GHNnz9f06ZNU0REhGt/hw4d9Pzzz3t7OQAA6onND5vnLrjgAr355pt6+eWX1aFDB02ZMkXTp09XTk6Oa8y9996r4cOH6/bbb9cFF1yg8vJyrVixQlFRUa4xCxcuVNu2bXX11VerV69euuSSS/T3v//du5/cMLx7JlCrVq303HPP6eqrr1ZcXJy2bdumli1baufOncrMzNSRI0e8CsAXZWVlio+P16GCdNnjuIsQ5nRduuetOiDY1BjVWlPzhkpLS90WvfnTqVyRNnuSQqKjfv2En+H8tkKFd0yq01jritcZcv/+/WrVqlWt/U6nU9XV1X4JCgAA+I/XyT4jI0Pvvvturf2vv/66zjvvPL8EBQCA39XzAr0zidePy50wYYJyc3O1f/9+OZ1O/etf/1JBQYEWLFigZcuW1UWMAAD4jrfeea5Pnz5aunSp/ve//ykmJkYTJkzQjh07tHTpUl1zzTV1ESMAAPDBab0I59JLL9XKlSv9HQsAAHXmdF9T+8Pzg9Vpv/Vu8+bN2rFjh6ST8/hdu3b1W1AAAPhdPb/17kzidbL/6quv9Mc//lHvvfeeEhISJElHjx7VRRddpFdeeUVNmzb1d4wAAMAHXs/Z33bbbaqurtaOHTtUUlKikpIS7dixQ06nU7fddltdxAgAgO9OLdDzZQtSXlf269at08aNG9WmTRvXvjZt2mjmzJm69NJL/RocAAD+YjNObr6cH6y8TvZpaWk/+fAch8Ph1XN6AQCoVxaes/e6jf+3v/1Nw4cP1+bNm137Nm/erLvvvluPP/64X4MDAAC+86iyb9iwoWy27+cqjh8/ru7duyss7OTpNTU1CgsL06233up6Uw8AAGcUCz9Ux6NkP3369DoOAwCAOmbhNr5HyT43N7eu4wAAAHXktB+qI0kVFRWqqqpy2xdsr/0DAFiEhSt7rxfoHT9+XMOGDVNSUpJiYmLUsGFDtw0AgDOShd9653Wyv/fee7V69WrNnj1bkZGRev755zV58mSlpqZqwYIFdREjAADwgddt/KVLl2rBggW64oorNGjQIF166aVq1aqV0tPTtXDhQuXk5NRFnAAA+MbCq/G9ruxLSkrUsmVLSSfn50tKSiRJl1xyidavX+/f6AAA8JNTT9DzZQtWXif7li1bas+ePZKktm3bavHixZJOVvynXowDAADOHF4n+0GDBmnbtm2SpLFjx2rWrFmKiorSyJEjNWbMGL8HCACAX1h4gZ7Xc/YjR450/TkrK0s7d+5Ufn6+WrVqpU6dOvk1OAAA4Duf7rOXpPT0dKWnp/sjFgAA6oxNPr71zm+R1D+Pkv2MGTM8vuBdd9112sEAAAD/8yjZP/XUUx5dzGazBSTZ39Cmi8Js4fX+vUB9eOfA5l8fBASpsmNONWxdT19m4VvvPEr2p1bfAwAQtHhcLgAAMCufF+gBABAULFzZk+wBAJbg61PwLPUEPQAAEFyo7AEA1mDhNv5pVfbvvvuuBg4cqMzMTO3fv1+S9I9//EMbNmzwa3AAAPiNhR+X63Wyf+ONN5Sdna3o6Gh99NFHqqyslCSVlpbqkUce8XuAAADAN14n+4ceekhz5szR3LlzFR7+/YNsLr74Ym3ZssWvwQEA4C9WfsWt13P2BQUFuuyyy2rtj4+P19GjR/0REwAA/mfhJ+h5XdmnpKRo165dtfZv2LBBLVu29EtQAAD4HXP2nhsyZIjuvvtubdq0STabTQcOHNDChQs1evRo3XHHHXURIwAA8IHXbfyxY8fK6XTq6quv1okTJ3TZZZcpMjJSo0eP1vDhw+siRgAAfGblh+p4nextNpvuv/9+jRkzRrt27VJ5ebkyMjIUGxtbF/EBAOAfFr7P/rQfqhMREaGMjAx/xgIAAOqA18n+yiuvlM328ysSV69e7VNAAADUCV9vn7NSZd+lSxe3z9XV1dq6das++eQT5ebm+isuAAD8iza+55566qmf3D9p0iSVl5f7HBAAAPAvv731buDAgXrxxRf9dTkAAPzLwvfZ++2td3l5eYqKivLX5QAA8CtuvfNCv3793D4bhqGDBw9q8+bNGj9+vN8CAwAA/uF1so+Pj3f7HBISojZt2ujBBx9Ujx49/BYYAADwD6+SvcPh0KBBg9SxY0c1bNiwrmICAMD/LLwa36sFeqGhoerRowdvtwMABB0rv+LW69X4HTp00BdffFEXsQAAgDrgdbJ/6KGHNHr0aC1btkwHDx5UWVmZ2wYAwBnLgrfdSV7M2T/44IO655571KtXL0nS9ddf7/bYXMMwZLPZ5HA4/B8lAAC+svCcvcfJfvLkyfrLX/6iNWvW1GU8AADAzzxO9oZx8leayy+/vM6CAQCgrvBQHQ/90tvuAAA4o9HG90zr1q1/NeGXlJT4FBAAAPAvr5L95MmTaz1BDwCAYEAb30MDBgxQUlJSXcUCAEDdsXAb3+P77JmvBwAgOHm9Gh8AgKBk4cre42TvdDrrMg4AAOoUc/YAAJidhSt7r5+NDwAAgguVPQDAGixc2ZPsAQCWYOU5e9r4AACYHJU9AMAaaOMDAGButPEBAIBpUdkDAKyBNj4AACZn4WRPGx8AgDr26KOPymazacSIEa59FRUVGjp0qBo1aqTY2Fj1799fxcXFbuft27dPvXv3VoMGDZSUlKQxY8aopqbG6+8n2QMALMHmh+10fPjhh3ruuefUqVMnt/0jR47U0qVL9dprr2ndunU6cOCA+vXr5zrucDjUu3dvVVVVaePGjXrppZc0f/58TZgwwesYSPYAAGsw/LB5qby8XDk5OZo7d64aNmzo2l9aWqoXXnhBTz75pK666ip17dpV8+bN08aNG/X+++9Lkv773//q008/1T//+U916dJF1157raZMmaJZs2apqqrKqzhI9gAASzh1650vmySVlZW5bZWVlT/7nUOHDlXv3r2VlZXltj8/P1/V1dVu+9u2batmzZopLy9PkpSXl6eOHTsqOTnZNSY7O1tlZWXavn27Vz87yR4AAC+kpaUpPj7etU2dOvUnx73yyivasmXLTx4vKipSRESEEhIS3PYnJyerqKjINeaHif7U8VPHvMFqfACANfhpNX5hYaHsdrtrd2RkZK2hhYWFuvvuu7Vy5UpFRUX58KX+QWUPALAOP8zX2+12t+2nkn1+fr4OHTqk888/X2FhYQoLC9O6des0Y8YMhYWFKTk5WVVVVTp69KjbecXFxUpJSZEkpaSk1Fqdf+rzqTGeItkDAOBnV199tT7++GNt3brVtXXr1k05OTmuP4eHh2vVqlWucwoKCrRv3z5lZmZKkjIzM/Xxxx/r0KFDrjErV66U3W5XRkaGV/HQxgcAWEJ9Phs/Li5OHTp0cNsXExOjRo0aufYPHjxYo0aNUmJioux2u4YPH67MzExdeOGFkqQePXooIyNDN910k6ZNm6aioiI98MADGjp06E92E34JyR4AYA1n2BP0nnrqKYWEhKh///6qrKxUdna2nn32Wdfx0NBQLVu2THfccYcyMzMVExOj3NxcPfjgg15/F8keAIB6sHbtWrfPUVFRmjVrlmbNmvWz56Snp2v58uU+fzfJHgBgCVZ+xS3JHgBgDWdYG78+sRofAACTo7IHAFgCbXwAAMzOwm18kj0AwBosnOyZswcAwOSo7AEAlsCcPQAAZkcbHwAAmBWVPQDAEmyGIZtx+uW5L+cGGskeAGANtPEBAIBZUdkDACyB1fgAAJgdbXwAAGBWVPYAAEugjQ8AgNlZuI1PsgcAWIKVK3vm7AEAMDkqewCANdDGBwDA/IK5Fe8L2vgAAJgclT0AwBoM4+Tmy/lBimQPALAEVuMDAADTorIHAFgDq/EBADA3m/Pk5sv5wYo2PgAAJkdlj1pe2vSpUtKqa+1/a34jzfpr0x/sMfTQP/fogquOadKtzZW3Ir7+ggS8cKI8RC9Na6yNb8fr6DdhOqf9t7pjyldq0+VbSVJ2apefPO+2B/brD3celiTd/JsMFX8V4Xb81nEHdOPwQ3UaO/yINj7wvbuuba2Q0O//VjdvW6FHX/1C7y5NcBv3uyFfB/OdKLCQp+5J096CKN0780slJldr9RuJGntjK81du1NnNa7Wy1s/cRv/4Wq7nronTZf0LnXbf/OYg7o25xvX5waxQdzXtSBW4wfI+vXrdd111yk1NVU2m01LliwJZDj4TmlJmI4cDndt3bPKdGBPhP4vL8Y1pmX7b9X/z4f15Ki0AEYK/LrKb23asDxBtz1wUB0vPK4mLap00+gipTav1LIFjSRJiUk1blveO/HqfHG5GqdXuV0rOtbpNi6qAck+qJy6z96XLUgFNNkfP35cnTt31qxZswIZBn5BWLhTV/U/ondeSZRkkyRFRjs1dtaXmnV/Ex05HB7YAIFf4XDY5HTYFBHpnpgjo5za/kFsrfFHDofpg1V2ZQ/4ptaxxc8k6fftO+jOa1rrtWfPlqOmzsIG/Cqgbfxrr71W1157rcfjKysrVVlZ6fpcVlZWF2HhBy7qWaZYu0P/XZzo2vfnSfv16eYY5b3DHD3OfA1inWrX9bgWTU9Rs3P3KuHsGq1d0lA78mOU2ryy1viVixMVHevQJb3cW/h9Bh9Wq47fKi6hRp9ujtG8qY1Vcihcf550oL5+FPjIym38oJqznzp1qiZPnhzoMCwl+4/f6MM1dpUUn6zgL+xRqi4Xl+vOHq0DHBnguXtnfqknRzXTn87voJBQQ606ntAVfY/o8/9rUGvsO68k6qrfHVFElPu/7P3/fNj155YZFQoPN/T0fWkaNO6gIiKDOAtYiYUX6AXVrXfjxo1TaWmpayssLAx0SKaW1KRK511arhWLvq/qu1xcrsbNq/SvnZ9o+b5tWr5vmyRp/Ny9mvb6rkCFCvyi1OZVevxfu/TvXf+nf27erpnLP1dNtU2N090r+483xeir3VHq+afaLfwfa3P+CTlqbCoujPjVsUCgBVVlHxkZqcjIyECHYRk9BpTo6Ndh2vQ/u2vfq88k6e0fJH9J+vuaz/TcpFS9/1/7jy8BnFGiGjgV1cCpY0dDlb/OrtsecG/Bv/NyI53b6YTOaV/xq9f6Ynu0QkIMJZzFxH2woI0P/IjNZqjHjSX632sN5XTYXPtPrdD/sUP7I1RcyC9iODNtXhsnw5DSzqnU/j0Ren5KE6W1qlCPG7+v4I8fC9H6pfG6fWLtOfhPNzfQzo9i1PmiY2oQ69SO/BjNmZiqq/ofUVyCoz5/FPiCt94B7s67rFzJTav1ziuNAh0K4LPjZaGaN7Wxvj4YrrgEhy7udVSDxh5U2A9+b13374aSYdOVfY/UOj88wtC6fyfon0+kqLrKppS0KvW7/bD63X641ljgTBTQZF9eXq5du76f592zZ4+2bt2qxMRENWvWLICRYcu6OGWndvZorKfjgEC5/Pqjuvz6o784ptfAb9Rr4E/P1Z/b6Vs9vezzOogM9Yk2foBs3rxZV155pevzqFGjJEm5ubmaP39+gKICAJiShVfjBzTZX3HFFTKCeA4EAIBgwJw9AMASaOMDAGB2TuPk5sv5QYpkDwCwBgvP2QfVE/QAAID3qOwBAJZgk49z9n6LpP6R7AEA1mDhJ+jRxgcAwOSo7AEAlsCtdwAAmB2r8QEAgFlR2QMALMFmGLL5sMjOl3MDjWQPALAG53ebL+cHKdr4AACYHJU9AMASaOMDAGB2Fl6NT7IHAFgDT9ADAABmRWUPALAEnqAHAIDZ0cYHAABmRWUPALAEm/Pk5sv5wYpkDwCwBtr4AADArKjsAQDWwEN1AAAwNys/Lpc2PgAAJkdlDwCwBgsv0CPZAwCswZBv76QP3lxPsgcAWANz9gAAwK+mTp2qCy64QHFxcUpKSlLfvn1VUFDgNqaiokJDhw5Vo0aNFBsbq/79+6u4uNhtzL59+9S7d281aNBASUlJGjNmjGpqaryKhWQPALAGQ9/P25/W5t3XrVu3TkOHDtX777+vlStXqrq6Wj169NDx48ddY0aOHKmlS5fqtdde07p163TgwAH169fPddzhcKh3796qqqrSxo0b9dJLL2n+/PmaMGGCV7HYDCN4+xJlZWWKj4/XFeqjMFt4oMMB6sQ7B7YGOgSgzpQdc6ph6y9UWloqu91eN9/xXa64qvN9CguNPO3r1DgqtXrbY6cd6+HDh5WUlKR169bpsssuU2lpqc4++2wtWrRIv//97yVJO3fuVLt27ZSXl6cLL7xQb7/9tn7729/qwIEDSk5OliTNmTNH9913nw4fPqyIiAiPvpvKHgAAL5SVlbltlZWVHp1XWloqSUpMTJQk5efnq7q6WllZWa4xbdu2VbNmzZSXlydJysvLU8eOHV2JXpKys7NVVlam7du3exwzyR4AYA1OP2yS0tLSFB8f79qmTp3661/tdGrEiBG6+OKL1aFDB0lSUVGRIiIilJCQ4DY2OTlZRUVFrjE/TPSnjp865ilW4wMALMFfq/ELCwvd2viRkb8+NTB06FB98skn2rBhw2l/vy+o7AEA8ILdbnfbfi3ZDxs2TMuWLdOaNWvUtGlT1/6UlBRVVVXp6NGjbuOLi4uVkpLiGvPj1fmnPp8a4wmSPQDAGnxaie/90/cMw9CwYcP05ptvavXq1WrRooXb8a5duyo8PFyrVq1y7SsoKNC+ffuUmZkpScrMzNTHH3+sQ4cOucasXLlSdrtdGRkZHsdCGx8AYA31/LjcoUOHatGiRfr3v/+tuLg41xx7fHy8oqOjFR8fr8GDB2vUqFFKTEyU3W7X8OHDlZmZqQsvvFCS1KNHD2VkZOimm27StGnTVFRUpAceeEBDhw71aPrgFJI9AAB1YPbs2ZKkK664wm3/vHnzdMstt0iSnnrqKYWEhKh///6qrKxUdna2nn32WdfY0NBQLVu2THfccYcyMzMVExOj3NxcPfjgg17FQrIHAFhDPVf2njzGJioqSrNmzdKsWbN+dkx6erqWL1/u1Xf/GMkeAGANTkk2H88PUiR7AIAl8CIcAABgWlT2AABrqOc5+zMJyR4AYA1OQ7L5kLCdwZvsaeMDAGByVPYAAGugjQ8AgNn5mOwVvMmeNj4AACZHZQ8AsAba+AAAmJzTkE+teFbjAwCAMxWVPQDAGgznyc2X84MUyR4AYA3M2QMAYHLM2QMAALOisgcAWANtfAAATM6Qj8neb5HUO9r4AACYHJU9AMAaaOMDAGByTqckH+6Vdwbvffa08QEAMDkqewCANdDGBwDA5Cyc7GnjAwBgclT2AABrsPDjckn2AABLMAynDB/eXOfLuYFGsgcAWINh+FadM2cPAADOVFT2AABrMHycsw/iyp5kDwCwBqdTsvkw7x7Ec/a08QEAMDkqewCANdDGBwDA3AynU4YPbfxgvvWONj4AACZHZQ8AsAba+AAAmJzTkGzWTPa08QEAMDkqewCANRiGJF/usw/eyp5kDwCwBMNpyPChjW+Q7AEAOMMZTvlW2XPrHQAAOENR2QMALIE2PgAAZmfhNn5QJ/tTv2XVqNqn5yQAZ7KyY8H7Dwzwa8rKT/79ro+q2ddcUaNq/wVTz4I62R87dkyStEHLAxwJUHcatg50BEDdO3bsmOLj4+vk2hEREUpJSdGGIt9zRUpKiiIiIvwQVf2yGUE8CeF0OnXgwAHFxcXJZrMFOhxLKCsrU1pamgoLC2W32wMdDuBX/P2uf4Zh6NixY0pNTVVISN2tGa+oqFBVVZXP14mIiFBUVJQfIqpfQV3Zh4SEqGnTpoEOw5Lsdjv/GMK0+Ptdv+qqov+hqKiooEzS/sKtdwAAmBzJHgAAkyPZwyuRkZGaOHGiIiMjAx0K4Hf8/YZZBfUCPQAA8Ouo7AEAMDmSPQAAJkeyBwDA5Ej2AACYHMkeHps1a5aaN2+uqKgode/eXR988EGgQwL8Yv369bruuuuUmpoqm82mJUuWBDokwK9I9vDIq6++qlGjRmnixInasmWLOnfurOzsbB06dCjQoQE+O378uDp37qxZs2YFOhSgTnDrHTzSvXt3XXDBBXrmmWcknXwvQVpamoYPH66xY8cGODrAf2w2m95880317ds30KEAfkNlj19VVVWl/Px8ZWVlufaFhIQoKytLeXl5AYwMAOAJkj1+1ddffy2Hw6Hk5GS3/cnJySoqKgpQVAAAT5HsAQAwOZI9ftVZZ52l0NBQFRcXu+0vLi5WSkpKgKICAHiKZI9fFRERoa5du2rVqlWufU6nU6tWrVJmZmYAIwMAeCIs0AEgOIwaNUq5ubnq1q2bfvOb32j69Ok6fvy4Bg0aFOjQAJ+Vl5dr165drs979uzR1q1blZiYqGbNmgUwMsA/uPUOHnvmmWf0t7/9TUVFRerSpYtmzJih7t27BzoswGdr167VlVdeWWt/bm6u5s+fX/8BAX5GsgcAwOSYswcAwORI9gAAmBzJHgAAkyPZAwBgciR7AABMjmQPAIDJkewBADA5kj0AACZHsgd8dMstt6hv376uz1dccYVGjBhR73GsXbtWNptNR48e/dkxNptNS5Ys8fiakyZNUpcuXXyKa+/evbLZbNq6datP1wFw+kj2MKVbbrlFNptNNptNERERatWqlR588EHV1NTU+Xf/61//0pQpUzwa60mCBgBf8SIcmFbPnj01b948VVZWavny5Ro6dKjCw8M1bty4WmOrqqoUERHhl+9NTEz0y3UAwF+o7GFakZGRSklJUXp6uu644w5lZWXprbfekvR96/3hhx9Wamqq2rRpI0kqLCzUDTfcoISEBCUmJqpPnz7au3ev65oOh0OjRo1SQkKCGjVqpHvvvVc/fr3Ej9v4lZWVuu+++5SWlqbIyEi1atVKL7zwgvbu3et6+UrDhg1ls9l0yy23SDr5CuGpU6eqRYsWio6OVufOnfX666+7fc/y5cvVunVrRUdH68orr3SL01P33XefWrdurQYNGqhly5YaP368qqura4177rnnlJaWpgYNGuiGG25QaWmp2/Hnn39e7dq1U1RUlNq2batnn33W61gA1B2SPSwjOjpaVVVVrs+rVq1SQUGBVq5cqWXLlqm6ulrZ2dmKi4vTu+++q/fee0+xsbHq2bOn67wnnnhC8+fP14svvqgNGzaopKREb7755i9+780336yXX35ZM2bM0I4dO/Tcc88pNjZWaWlpeuONNyRJBQUFOnjwoJ5++mlJ0tSpU7VgwQLNmTNH27dv18iRIzVw4ECtW7dO0slfSvr166frrrtOW7du1W233aaxY8d6/b9JXFyc5s+fr08//VRPP/205s6dq6eeesptzK5du7R48WItXbpUK1as0EcffaQ777zTdXzhwoWaMGGCHn74Ye3YsUOPPPKIxo8fr5deesnreADUEQMwodzcXKNPnz6GYRiG0+k0Vq5caURGRhqjR492HU9OTjYqKytd5/zjH/8w2rRpYzidTte+yspKIzo62njnnXcMwzCMxo0bG9OmTXMdr66uNpo2ber6LsMwjMsvv9y4++67DcMwjIKCAkOSsXLlyp+Mc82aNYYk48iRI659FRUVRoMGDYyNGze6jR08eLDxxz/+0TAMwxg3bpyRkZHhdvy+++6rda0fk2S8+eabP3v8b3/7m9G1a1fX54kTJxqhoaHGV1995dr39ttvGyEhIcbBgwcNwzCMc845x1i0aJHbdaZMmWJkZmYahmEYe/bsMSQZH3300c9+L4C6xZw9TGvZsmWKjY1VdXW1nE6n/vSnP2nSpEmu4x07dnSbp9+2bZt27dqluLg4t+tUVFRo9+7dKi0t1cGDB9W9e3fXsbCwMHXr1q1WK/+UrVu3KjQ0VJdffrnHce/atUsnTpzQNddc47a/qqpK5513niRpx44dbnFIUmZmpsffccqrr76qGTNmaPfu3SovL1dNTY3sdrvbmGbNmqlJkyZu3+N0OlVQUKC4uDjt3r1bgwcP1pAhQ1xjampqFB8f73U8AOoGyR6mdeWVV2r27NmKiIhQamqqwsLc/7rHxMS4fS4vL1fXrl21cOHCWtc6++yzTyuG6Ohor88pLy+XJP3nP/9xS7LSyXUI/pKXl6ecnBxNnjxZ2dnZio+P1yuvvKInnnjC61jnzp1b65eP0NBQv8UKwDcke5hWTEyMWrVq5fH4888/X6+++qqSkpJqVbenNG7cWJs2bdJll10m6WQFm5+fr/PPP/8nx3fs2FFOp1Pr1q1TVlZWreOnOgsOh8O1LyMjQ5GRkdq3b9/PdgTatWvnWmx4yvvvv//rP+QPbNy4Uenp6br//vtd+7788sta4/bt26cDBw4oNTXV9T0hISFq06aNkpOTlZqaqi+++EI5OTlefT+A+sMCPeA7OTk5Ouuss9SnTx+9++672rNnj9auXau77rpLX331lSTp7rvv1qOPPqolS5Zo586duvPOO3/xHvnmzZsrNzdXt956q5YsWeK65uLFiyVJ6enpstlsWrZsmQ4fPqzy8nLFxcVp9OjRGjlypF566SXt3r1bW7Zs0cyZM12L3v7yl7/o888/15gxY1RQUKBFixZp/vz5Xv285557rvbt26dXXnlFu3fv1owZM35ysWFUVJRyc3O1bds2vfvuu7rrrrt0ww03KCUlRZI0efJkTZ06VTNmzNBnn32mjz/+WPPmzdOTTz7pVTwA6g7JHvhOgwYNtH79ejVr1kz9+vVTu3btNHjwYFVUVLgq/XvuuUc33XSTcnNzlZmZqbi4OP3ud7/7xevOnj1bv//973XnnXeqbdu2GjJkiI4fPy5JatKkiSZPnqyxY8cqOTlZw4YNkyRNmTJF48eP19SpU9WuXTv17NlT//nPf9SiRQtJJ+fR33jjDS1ZskSdO3fWnDlz9Mgjj3j1815//fUaOXKkhg0bpi5dumjjxo0aP358rXGtWrVSv3791KtXL/Xo0UOdOnVyu7Xutttu0/PPP6958+apY8eOuvzyyzV//nxXrAACz2b83MoiAABgClT2AACYHMkeAACTI9kDAGByJHsAAEyOZA8AgMmR7AEAMDmSPQAAJkeyBwDA5Ej2AACYHMkeAACTI9kDAGBy/w+oNVOVd8ygjQAAAABJRU5ErkJggg==",
+      "text/plain": [
+       "<Figure size 640x480 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "y_pred = clf_svm.predict(X_test)\n",
+    "#accuracy, precision, recall, F1\n",
+    "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n",
+    "print('True Negatives: ', tn, '\\nFalse Positives: ', fp, '\\nFalse Negatives: ', fn, '\\nTrue Positives: ', tp)\n",
+    "print('-'*55)\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "ConfusionMatrixDisplay.from_predictions(y_test, y_pred)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "ca370362",
+   "metadata": {},
+   "source": [
+    "##### iii.\tGradient Boosting Classifier (GBC)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1644,
+   "id": "02e51066",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GBC score =  0.99\n",
+      "GBC accuracy score = 99.00%\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import GradientBoostingClassifier\n",
+    "\n",
+    "# Create a GradientBoostingClassifier object\n",
+    "clf_gb = GradientBoostingClassifier(n_estimators=40, learning_rate=0.5, max_depth=11, random_state=10, subsample=1.0, max_features='sqrt').fit(X_train, y_train)\n",
+    "score_gb = clf_gb.score(X_test, y_test)\n",
+    "print(\"GBC score = \", score_gb)\n",
+    "print(\"GBC accuracy score = %0.2f%%\" % (score_gb*100))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "6fb52ba0",
+   "metadata": {},
+   "source": [
+    "A near-perfect accuracy score of 99% was obtained using GBC."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1645,
+   "id": "bcc80fcc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.98749999 0.99333326 0.99333333 0.99249995 0.99083333]\n",
+      "5-fold cross-validation accuracy [99.15%] with standard deviation [0.22%]\n"
+     ]
+    }
+   ],
+   "source": [
+    "cross_val_scores_xgb = cross_val_score(clf_gb, X, y, cv=kfold, scoring='f1_macro')\n",
+    "print (cross_val_scores_xgb)\n",
+    "print (\"5-fold cross-validation accuracy [%0.2f%%] with standard deviation [%0.2f%%]\" % (100*cross_val_scores_xgb.mean(), 100*cross_val_scores_xgb.std()))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "f8879a57",
+   "metadata": {},
+   "source": [
+    "Even the cross-validation result is the best among the models tested."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1646,
+   "id": "cd106a88",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True Negatives:  1039 \n",
+      "False Positives:  12 \n",
+      "False Negatives:  9 \n",
+      "True Positives:  1040\n",
+      "-------------------------------------------------------\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.99      0.99      0.99      1051\n",
+      "           1       0.99      0.99      0.99      1049\n",
+      "\n",
+      "    accuracy                           0.99      2100\n",
+      "   macro avg       0.99      0.99      0.99      2100\n",
+      "weighted avg       0.99      0.99      0.99      2100\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x12f41622e50>"
+      ]
+     },
+     "execution_count": 1646,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 2 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "y_pred = clf_gb.predict(X_test)\n",
+    "#accuracy, precision, recall, F1\n",
+    "tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n",
+    "print('True Negatives: ', tn, '\\nFalse Positives: ', fp, '\\nFalse Negatives: ', fn, '\\nTrue Positives: ', tp)\n",
+    "print('-'*55)\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "ConfusionMatrixDisplay.from_predictions(y_test, y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1647,
+   "id": "87e689ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#difference_scores = clf_svm.score(X_test, y_test) - clf_lr.score(X_test, y_test) \n",
+    "#difference_mean = 100*cross_val_scores_lr.mean() - 100*cross_val_scores_svm.mean()\n",
+    "#difference_std = 100*cross_val_scores_lr.std() - 100*cross_val_scores_svm.std()\n",
+    "#print(\"scores difference = \", difference_scores)\n",
+    "#print(\"mean difference = \", difference_mean)\n",
+    "#print(\"standard deviation difference = \", difference_std)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "955491a0",
+   "metadata": {},
+   "source": [
+    "The result appears to be stable with cross-validation. This was expected given target data has a good split between positive and negative. \n",
+    "Since SVM returns a near-identical result for accuracy and cross-validation compared to Logistic Regression, it is chosen as the model to train and test the dataset."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "fbaeaae9",
+   "metadata": {},
+   "source": [
+    "### A.5\tML Model with Best Result\n",
+    "The The model with best result was found to be GBC after comparison between accuracy score, 5-fold crpss-validation result and confusion matrix were made. was found to be GBC after comparison between accuracy score, 5-fold crpss-validation result and confusion matrix were made."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "a4cd28dc",
+   "metadata": {},
+   "source": [
+    "\n",
+    "### A.6\tGBC Testing Result\n",
+    "Let's predict outcome for our test data using the chosen classifier."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1648,
+   "id": "77fbdfde",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_raw_data = pd.read_csv('TestingDataBinary.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1649,
+   "id": "b6aeecef",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Columns: 128 entries, R1-PA1:VH to snort_log4\n",
+      "dtypes: float64(112), int64(16)\n",
+      "memory usage: 100.1 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_raw_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1650,
+   "id": "dec22570",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[]"
+      ]
+     },
+     "execution_count": 1650,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_raw_data.columns[test_raw_data.isnull().any()].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1651,
+   "id": "80208613",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1-PA1:VH</th>\n",
+       "      <th>R1-PM1:V</th>\n",
+       "      <th>R1-PA2:VH</th>\n",
+       "      <th>R1-PM2:V</th>\n",
+       "      <th>R1-PA3:VH</th>\n",
+       "      <th>R1-PM3:V</th>\n",
+       "      <th>R1-PA4:IH</th>\n",
+       "      <th>R1-PM4:I</th>\n",
+       "      <th>R1-PA5:IH</th>\n",
+       "      <th>R1-PM5:I</th>\n",
+       "      <th>...</th>\n",
+       "      <th>control_panel_log3</th>\n",
+       "      <th>control_panel_log4</th>\n",
+       "      <th>relay1_log</th>\n",
+       "      <th>relay2_log</th>\n",
+       "      <th>relay3_log</th>\n",
+       "      <th>relay4_log</th>\n",
+       "      <th>snort_log1</th>\n",
+       "      <th>snort_log2</th>\n",
+       "      <th>snort_log3</th>\n",
+       "      <th>snort_log4</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>11.596666</td>\n",
+       "      <td>131810.1804</td>\n",
+       "      <td>-108.380697</td>\n",
+       "      <td>131183.3486</td>\n",
+       "      <td>131.614135</td>\n",
+       "      <td>131860.3269</td>\n",
+       "      <td>9.453804</td>\n",
+       "      <td>370.98086</td>\n",
+       "      <td>-110.466263</td>\n",
+       "      <td>370.98086</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>97.448662</td>\n",
+       "      <td>130656.8100</td>\n",
+       "      <td>-22.528701</td>\n",
+       "      <td>130631.7367</td>\n",
+       "      <td>-142.540440</td>\n",
+       "      <td>130732.0298</td>\n",
+       "      <td>98.439879</td>\n",
+       "      <td>444.59108</td>\n",
+       "      <td>-26.911828</td>\n",
+       "      <td>471.32514</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>55.244591</td>\n",
+       "      <td>131208.4219</td>\n",
+       "      <td>-64.755690</td>\n",
+       "      <td>130556.5169</td>\n",
+       "      <td>175.267790</td>\n",
+       "      <td>131233.4952</td>\n",
+       "      <td>50.987514</td>\n",
+       "      <td>463.63452</td>\n",
+       "      <td>-68.949741</td>\n",
+       "      <td>463.63452</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>61</th>\n",
+       "      <td>116.757340</td>\n",
+       "      <td>131709.8873</td>\n",
+       "      <td>-3.208564</td>\n",
+       "      <td>131684.8140</td>\n",
+       "      <td>-123.214574</td>\n",
+       "      <td>131785.1071</td>\n",
+       "      <td>115.112951</td>\n",
+       "      <td>358.71249</td>\n",
+       "      <td>-5.150891</td>\n",
+       "      <td>358.52938</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>106.151891</td>\n",
+       "      <td>131785.1071</td>\n",
+       "      <td>-13.808283</td>\n",
+       "      <td>131760.0339</td>\n",
+       "      <td>-133.820023</td>\n",
+       "      <td>131860.3269</td>\n",
+       "      <td>107.601474</td>\n",
+       "      <td>324.65403</td>\n",
+       "      <td>-13.097815</td>\n",
+       "      <td>328.49934</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 128 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     R1-PA1:VH     R1-PM1:V   R1-PA2:VH     R1-PM2:V   R1-PA3:VH     R1-PM3:V  \\\n",
+       "7    11.596666  131810.1804 -108.380697  131183.3486  131.614135  131860.3269   \n",
+       "40   97.448662  130656.8100  -22.528701  130631.7367 -142.540440  130732.0298   \n",
+       "12   55.244591  131208.4219  -64.755690  130556.5169  175.267790  131233.4952   \n",
+       "61  116.757340  131709.8873   -3.208564  131684.8140 -123.214574  131785.1071   \n",
+       "34  106.151891  131785.1071  -13.808283  131760.0339 -133.820023  131860.3269   \n",
+       "\n",
+       "     R1-PA4:IH   R1-PM4:I   R1-PA5:IH   R1-PM5:I  ...  control_panel_log3  \\\n",
+       "7     9.453804  370.98086 -110.466263  370.98086  ...                   0   \n",
+       "40   98.439879  444.59108  -26.911828  471.32514  ...                   0   \n",
+       "12   50.987514  463.63452  -68.949741  463.63452  ...                   0   \n",
+       "61  115.112951  358.71249   -5.150891  358.52938  ...                   0   \n",
+       "34  107.601474  324.65403  -13.097815  328.49934  ...                   0   \n",
+       "\n",
+       "    control_panel_log4  relay1_log  relay2_log  relay3_log  relay4_log  \\\n",
+       "7                    0           0           0           0           0   \n",
+       "40                   0           0           0           0           0   \n",
+       "12                   0           0           0           0           0   \n",
+       "61                   0           0           0           0           0   \n",
+       "34                   0           0           0           0           0   \n",
+       "\n",
+       "    snort_log1  snort_log2  snort_log3  snort_log4  \n",
+       "7            0           0           0           0  \n",
+       "40           0           0           0           0  \n",
+       "12           0           0           0           0  \n",
+       "61           0           0           0           0  \n",
+       "34           0           0           0           0  \n",
+       "\n",
+       "[5 rows x 128 columns]"
+      ]
+     },
+     "execution_count": 1651,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_raw_data.sample(n=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1652,
+   "id": "f774ee14",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1:S_0</th>\n",
+       "      <th>R1:S_2058</th>\n",
+       "      <th>R2:S_0</th>\n",
+       "      <th>R3:S_0</th>\n",
+       "      <th>R4:S_0</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>83</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>61</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    R1:S_0  R1:S_2058  R2:S_0  R3:S_0  R4:S_0\n",
+       "44       1          0       1       1       1\n",
+       "83       1          0       1       1       1\n",
+       "61       1          0       1       1       1"
+      ]
+     },
+     "execution_count": 1652,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#let's save a copy before proceeding\n",
+    "results_df = test_raw_data.copy()\n",
+    "\n",
+    "test_RS_frame = test_raw_data[column_names_RS]\n",
+    "test_RS_frame = test_RS_frame.astype('int')\n",
+    "test_RS_frame = test_RS_frame.astype('category')\n",
+    "test_RS_frame = pd.get_dummies(test_RS_frame, dtype=int)\n",
+    "test_RS_frame.sample(3)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "4be4f161",
+   "metadata": {},
+   "source": [
+    "It is observed that test data does not has all the categorical values for RS feature as in training set. For numerical analysis let's add additional dummy columns by hand."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1653,
+   "id": "2e3ae9b9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1:S_0</th>\n",
+       "      <th>R1:S_2048</th>\n",
+       "      <th>R1:S_2058</th>\n",
+       "      <th>R1:S_270336</th>\n",
+       "      <th>R2:S_0</th>\n",
+       "      <th>R2:S_270336</th>\n",
+       "      <th>R3:S_0</th>\n",
+       "      <th>R3:S_2048</th>\n",
+       "      <th>R3:S_270336</th>\n",
+       "      <th>R4:S_0</th>\n",
+       "      <th>R4:S_2058</th>\n",
+       "      <th>R4:S_270336</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    R1:S_0  R1:S_2048  R1:S_2058  R1:S_270336  R2:S_0  R2:S_270336  R3:S_0  \\\n",
+       "74       1          0          0            0       1            0       1   \n",
+       "8        1          0          0            0       1            0       1   \n",
+       "\n",
+       "    R3:S_2048  R3:S_270336  R4:S_0  R4:S_2058  R4:S_270336  \n",
+       "74          0            0       1          0            0  \n",
+       "8           0            0       1          0            0  "
+      ]
+     },
+     "execution_count": 1653,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = RS_frame.columns.values\n",
+    "b = test_RS_frame.columns.values\n",
+    "missing_cols = [ai for ai in a if ai not in b]\n",
+    "missing_cols\n",
+    "for col in a :\n",
+    "    if col not in b :\n",
+    "        test_RS_frame[col] = 0\n",
+    "\n",
+    "#order columns as in training\n",
+    "test_RS_frame = test_RS_frame.loc[:,a]\n",
+    "test_RS_frame.sample(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1654,
+   "id": "890ce249",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Columns: 124 entries, R1-PA1:VH to snort_log4\n",
+      "dtypes: float64(112), int64(12)\n",
+      "memory usage: 97.0 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "#let's drop RS columns\n",
+    "test_raw_data = test_raw_data.drop(column_names_RS, axis=1)\n",
+    "test_raw_data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1655,
+   "id": "24da31bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#let's deal with features requiring normaliation\n",
+    "unscaled_test_input = test_raw_data.iloc[:,:-12]\n",
+    "unscaled_test_input.replace([np.inf, -np.inf], np.nan, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1656,
+   "id": "d17cd7e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 112)"
+      ]
+     },
+     "execution_count": 1656,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "scaled_test_input = scaler.transform(unscaled_test_input)\n",
+    "scaled_test_input.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1657,
+   "id": "33845d19",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Columns: 112 entries, R1-PA1:VH to R4-PA:ZH\n",
+      "dtypes: float64(112)\n",
+      "memory usage: 87.6 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_df1_scaled = pd.DataFrame(scaled_test_input, index=unscaled_test_input.index, columns=column_names_updated[:scaled_test_input.shape[1]])\n",
+    "test_df1_scaled.replace(np.nan, 1, inplace=True)\n",
+    "test_df1_scaled.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1658,
+   "id": "75c977ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Data columns (total 12 columns):\n",
+      " #   Column              Non-Null Count  Dtype\n",
+      "---  ------              --------------  -----\n",
+      " 0   control_panel_log1  100 non-null    int64\n",
+      " 1   control_panel_log2  100 non-null    int64\n",
+      " 2   control_panel_log3  100 non-null    int64\n",
+      " 3   control_panel_log4  100 non-null    int64\n",
+      " 4   relay1_log          100 non-null    int64\n",
+      " 5   relay2_log          100 non-null    int64\n",
+      " 6   relay3_log          100 non-null    int64\n",
+      " 7   relay4_log          100 non-null    int64\n",
+      " 8   snort_log1          100 non-null    int64\n",
+      " 9   snort_log2          100 non-null    int64\n",
+      " 10  snort_log3          100 non-null    int64\n",
+      " 11  snort_log4          100 non-null    int64\n",
+      "dtypes: int64(12)\n",
+      "memory usage: 9.5 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_df2 = test_raw_data.iloc[:,-12:]\n",
+    "test_df2.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1659,
+   "id": "4f176a75",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Columns: 136 entries, R1-PA1:VH to R4:S_270336\n",
+      "dtypes: float64(112), int32(5), int64(19)\n",
+      "memory usage: 104.4 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_X = pd.concat([test_df1_scaled, test_df2, test_RS_frame], axis=1)\n",
+    "test_X.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1660,
+   "id": "7aacce46",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
+       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)"
+      ]
+     },
+     "execution_count": 1660,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results = clf_gb.predict(test_X)\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1661,
+   "id": "c627bdcf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 100 entries, 0 to 99\n",
+      "Columns: 129 entries, R1-PA1:VH to marker\n",
+      "dtypes: float64(112), int64(17)\n",
+      "memory usage: 100.9 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "results_df['marker'] = np.transpose(results)\n",
+    "results_df.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1662,
+   "id": "0ea24283",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>R1-PA1:VH</th>\n",
+       "      <th>R1-PM1:V</th>\n",
+       "      <th>R1-PA2:VH</th>\n",
+       "      <th>R1-PM2:V</th>\n",
+       "      <th>R1-PA3:VH</th>\n",
+       "      <th>R1-PM3:V</th>\n",
+       "      <th>R1-PA4:IH</th>\n",
+       "      <th>R1-PM4:I</th>\n",
+       "      <th>R1-PA5:IH</th>\n",
+       "      <th>R1-PM5:I</th>\n",
+       "      <th>...</th>\n",
+       "      <th>control_panel_log4</th>\n",
+       "      <th>relay1_log</th>\n",
+       "      <th>relay2_log</th>\n",
+       "      <th>relay3_log</th>\n",
+       "      <th>relay4_log</th>\n",
+       "      <th>snort_log1</th>\n",
+       "      <th>snort_log2</th>\n",
+       "      <th>snort_log3</th>\n",
+       "      <th>snort_log4</th>\n",
+       "      <th>marker</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>88.120909</td>\n",
+       "      <td>131007.83580</td>\n",
+       "      <td>-31.879372</td>\n",
+       "      <td>130381.0040</td>\n",
+       "      <td>-151.862464</td>\n",
+       "      <td>131083.0556</td>\n",
+       "      <td>83.439844</td>\n",
+       "      <td>485.79083</td>\n",
+       "      <td>-36.566166</td>\n",
+       "      <td>485.97394</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>55.244591</td>\n",
+       "      <td>131208.42190</td>\n",
+       "      <td>-64.755690</td>\n",
+       "      <td>130556.5169</td>\n",
+       "      <td>175.267790</td>\n",
+       "      <td>131233.4952</td>\n",
+       "      <td>50.987514</td>\n",
+       "      <td>463.63452</td>\n",
+       "      <td>-68.949741</td>\n",
+       "      <td>463.63452</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>-104.123620</td>\n",
+       "      <td>75846.64175</td>\n",
+       "      <td>144.293691</td>\n",
+       "      <td>121931.3120</td>\n",
+       "      <td>11.751364</td>\n",
+       "      <td>123636.2944</td>\n",
+       "      <td>-165.607721</td>\n",
+       "      <td>1580.60552</td>\n",
+       "      <td>137.676029</td>\n",
+       "      <td>391.12296</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>11.865956</td>\n",
+       "      <td>131810.18040</td>\n",
+       "      <td>-108.111406</td>\n",
+       "      <td>131183.3486</td>\n",
+       "      <td>131.889155</td>\n",
+       "      <td>131860.3269</td>\n",
+       "      <td>9.774660</td>\n",
+       "      <td>370.98086</td>\n",
+       "      <td>-110.162595</td>\n",
+       "      <td>370.98086</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>88.888672</td>\n",
+       "      <td>131032.90900</td>\n",
+       "      <td>-31.105879</td>\n",
+       "      <td>130406.0773</td>\n",
+       "      <td>-151.088971</td>\n",
+       "      <td>131083.0556</td>\n",
+       "      <td>84.683162</td>\n",
+       "      <td>483.77662</td>\n",
+       "      <td>-35.775485</td>\n",
+       "      <td>485.79083</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 129 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     R1-PA1:VH      R1-PM1:V   R1-PA2:VH     R1-PM2:V   R1-PA3:VH  \\\n",
+       "20   88.120909  131007.83580  -31.879372  130381.0040 -151.862464   \n",
+       "12   55.244591  131208.42190  -64.755690  130556.5169  175.267790   \n",
+       "25 -104.123620   75846.64175  144.293691  121931.3120   11.751364   \n",
+       "5    11.865956  131810.18040 -108.111406  131183.3486  131.889155   \n",
+       "14   88.888672  131032.90900  -31.105879  130406.0773 -151.088971   \n",
+       "\n",
+       "       R1-PM3:V   R1-PA4:IH    R1-PM4:I   R1-PA5:IH   R1-PM5:I  ...  \\\n",
+       "20  131083.0556   83.439844   485.79083  -36.566166  485.97394  ...   \n",
+       "12  131233.4952   50.987514   463.63452  -68.949741  463.63452  ...   \n",
+       "25  123636.2944 -165.607721  1580.60552  137.676029  391.12296  ...   \n",
+       "5   131860.3269    9.774660   370.98086 -110.162595  370.98086  ...   \n",
+       "14  131083.0556   84.683162   483.77662  -35.775485  485.79083  ...   \n",
+       "\n",
+       "    control_panel_log4  relay1_log  relay2_log  relay3_log  relay4_log  \\\n",
+       "20                   0           0           0           0           0   \n",
+       "12                   0           0           0           0           0   \n",
+       "25                   0           0           0           0           0   \n",
+       "5                    0           0           0           0           0   \n",
+       "14                   0           0           0           0           0   \n",
+       "\n",
+       "    snort_log1  snort_log2  snort_log3  snort_log4  marker  \n",
+       "20           0           0           0           0       1  \n",
+       "12           0           0           0           0       1  \n",
+       "25           0           0           0           0       1  \n",
+       "5            0           0           0           0       1  \n",
+       "14           0           0           0           0       1  \n",
+       "\n",
+       "[5 rows x 129 columns]"
+      ]
+     },
+     "execution_count": 1662,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results_df.sample(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1663,
+   "id": "20c29600",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_df.to_csv('TestingResultsBinary.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1664,
+   "id": "58c758f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "marker count:\n",
+      "1    56\n",
+      "0    44\n",
+      "Name: marker, dtype: int64\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 2000x600 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import matplotlib.patches as mpatches\n",
+    "\n",
+    "# Get the sequence of results\n",
+    "results_sequence = results_df['marker']\n",
+    "print('marker count:')\n",
+    "results_sequence_counts = results_df['marker'].value_counts()\n",
+    "print(results_sequence_counts)\n",
+    "# Define colors for each class in the sequence\n",
+    "colors = ['blue' if result == 0 else 'red' for result in results_sequence]\n",
+    "\n",
+    "plt.figure(figsize=(20, 6))\n",
+    "plt.bar(range(len(results_sequence)), np.ones_like(results_sequence), color=colors, width=1)\n",
+    "patches = [mpatches.Patch(color=color, label=label) for label, color in zip(['Normal     (0)', 'Abnormal (1)'], ['blue', 'red'])]\n",
+    "plt.legend(handles=patches, bbox_to_anchor=(1, 1), loc='upper left')\n",
+    "plt.xlabel('Sequence')\n",
+    "plt.ylabel('Events')\n",
+    "plt.title('Sequence of Predicted Events')\n",
+    "plt.yticks([])\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab