diff --git a/COMP3217.docx b/COMP3217.docx
new file mode 100644
index 0000000000000000000000000000000000000000..e4bd96f44bf0065b1cc577a2cd42134a8fa508e1
Binary files /dev/null and b/COMP3217.docx differ
diff --git a/part1 (2).ipynb b/part1 (2).ipynb
deleted file mode 100644
index 93a16f5a426677ad43fd92407f797760c255ae26..0000000000000000000000000000000000000000
--- a/part1 (2).ipynb	
+++ /dev/null
@@ -1,537 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import sklearn\n",
-    "import scipy\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.decomposition import PCA\n",
-    "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.decomposition import PCA\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.linear_model import LogisticRegression\n",
-    "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "import pandas as pd\n",
-    "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.model_selection import RandomizedSearchCV\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Read CSV file as Pandas Dataframe\n",
-    "train_df = pd.read_csv('TrainingDataBinary.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 6000 entries, 0 to 5999\n",
-      "Columns: 129 entries, 1 to 129\n",
-      "dtypes: float64(112), int64(17)\n",
-      "memory usage: 5.9 MB\n",
-      "None\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(train_df.info())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([3000.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
-       "        3000.]),\n",
-       " array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n",
-       " <BarContainer object of 10 artists>)"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "",
-      "text/plain": [
-       "<Figure size 640x480 with 1 Axes>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# Create a histogram to show the distribution of a column\n",
-    "plt.hist(train_df['129'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scaler = StandardScaler()\n",
-    "\n",
-    "# Separate the features from the target variable\n",
-    "X = train_df.drop('129', axis=1)\n",
-    "y = train_df['129']\n",
-    "\n",
-    "#Fix infinite value error\n",
-    "# X[X == np.inf] = np.finfo('float64').max\n",
-    "X.replace([np.inf,-np.inf],0,inplace=True)\n",
-    "\n",
-    "# Create a SimpleImputer object to replace NaN values with the mean value of the corresponding column\n",
-    "imputer = SimpleImputer(strategy='mean')\n",
-    "\n",
-    "# Impute the missing values in the features data\n",
-    "X_imputed = imputer.fit_transform(X)\n",
-    "\n",
-    "# Fit the scaler to the features data and transform the data\n",
-    "X_scaled = scaler.fit_transform(X_imputed)\n",
-    "\n",
-    "# # The transformed data will be a numpy array, so you can convert it back to a DataFrame\n",
-    "# X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#PCA\n",
-    "pca = PCA(n_components=100)\n",
-    "X_pca = pca.fit_transform(X_scaled)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.895\n",
-      "Classification Report:\n",
-      "               precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.86      0.94      0.90       588\n",
-      "           1       0.93      0.86      0.89       612\n",
-      "\n",
-      "    accuracy                           0.90      1200\n",
-      "   macro avg       0.90      0.90      0.89      1200\n",
-      "weighted avg       0.90      0.90      0.89      1200\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n"
-     ]
-    }
-   ],
-   "source": [
-    "#split data\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)\n",
-    "\n",
-    "#train the model\n",
-    "log_reg = LogisticRegression()\n",
-    "log_reg.fit(X_train, y_train)\n",
-    "\n",
-    "# 5. Evaluate the model on the testing set\n",
-    "y_pred = log_reg.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "\n",
-    "report = classification_report(y_test, y_pred)\n",
-    "\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "\n",
-    "print(\"Classification Report:\\n\", report)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Confusion Matrix:\n",
-      " [[550  38]\n",
-      " [ 88 524]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "print(\"Confusion Matrix:\\n\", conf_matrix)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[[True Negatives (TN), False Positives (FP)],\n",
-    " [False Negatives (FN), True Positives (TP)]]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Fine tuning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:378: FitFailedWarning: \n",
-      "120 fits failed out of a total of 300.\n",
-      "The score on these train-test partitions for these parameters will be set to nan.\n",
-      "If these failures are not expected, you can try to debug them by setting error_score='raise'.\n",
-      "\n",
-      "Below are more details about the failures:\n",
-      "--------------------------------------------------------------------------------\n",
-      "24 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "15 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 71, in _check_solver\n",
-      "    raise ValueError(\"penalty='none' is not supported for the liblinear solver\")\n",
-      "ValueError: penalty='none' is not supported for the liblinear solver\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "18 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "6 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1291, in fit\n",
-      "    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\parallel.py\", line 63, in __call__\n",
-      "    return super().__call__(iterable_with_config)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 1085, in __call__\n",
-      "    if self.dispatch_one_batch(iterator):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 901, in dispatch_one_batch\n",
-      "    self._dispatch(tasks)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 819, in _dispatch\n",
-      "    job = self._backend.apply_async(batch, callback=cb)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 208, in apply_async\n",
-      "    result = ImmediateResult(func)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 597, in __init__\n",
-      "    self.results = batch()\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 288, in __call__\n",
-      "    return [func(*args, **kwargs)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 288, in <listcomp>\n",
-      "    return [func(*args, **kwargs)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\parallel.py\", line 123, in __call__\n",
-      "    return self.function(*args, **kwargs)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 521, in _logistic_regression_path\n",
-      "    alpha = (1.0 / C) * (1 - l1_ratio)\n",
-      "TypeError: unsupported operand type(s) for -: 'int' and 'NoneType'\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "21 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 64, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "12 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "9 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver sag supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "6 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "9 fits failed with the following error:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
-      "    estimator.fit(X_train, y_train, **fit_params)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
-      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
-      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
-      "    raise ValueError(\n",
-      "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.\n",
-      "\n",
-      "  warnings.warn(some_fits_failed_message, FitFailedWarning)\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_search.py:952: UserWarning: One or more of the test scores are non-finite: [       nan 0.87666667 0.92083333        nan        nan 0.87416667\n",
-      "        nan 0.87666667 0.864375   0.87645833 0.78208333 0.87854167\n",
-      " 0.72333333 0.87854167 0.85645833        nan        nan        nan\n",
-      " 0.85083333 0.72333333 0.5025     0.92020833 0.78208333 0.918125\n",
-      " 0.86458333 0.87666667        nan 0.9225     0.90375           nan\n",
-      " 0.78208333        nan 0.5025            nan        nan        nan\n",
-      "        nan 0.78208333        nan 0.78208333 0.85645833 0.628125\n",
-      " 0.918125          nan 0.49916667 0.85875           nan 0.49916667\n",
-      "        nan        nan 0.87791667 0.86520833        nan 0.9225\n",
-      "        nan 0.918125   0.865625   0.84166667        nan 0.9225\n",
-      " 0.90375    0.918125   0.87375    0.918125   0.864375          nan\n",
-      "        nan 0.87666667        nan 0.90375    0.85625    0.62895833\n",
-      "        nan        nan 0.85625           nan        nan 0.87854167\n",
-      " 0.85645833        nan 0.87791667 0.90395833 0.87854167        nan\n",
-      "        nan 0.87375    0.78208333 0.87666667        nan        nan\n",
-      " 0.78208333 0.90270833        nan        nan 0.85625           nan\n",
-      " 0.86583333        nan        nan        nan]\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1173: FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1181: UserWarning: Setting penalty=None will ignore the C and l1_ratio parameters\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Best Parameters: {'solver': 'newton-cg', 'penalty': 'none', 'max_iter': 100, 'C': 0.005994842503189409}\n",
-      "Best Score: 0.9225\n",
-      "Accuracy: 0.9325\n",
-      "Confusion Matrix:\n",
-      " [[557  31]\n",
-      " [ 50 562]]\n",
-      "Classification Report:\n",
-      "               precision    recall  f1-score   support\n",
-      "\n",
-      "           0       0.92      0.95      0.93       588\n",
-      "           1       0.95      0.92      0.93       612\n",
-      "\n",
-      "    accuracy                           0.93      1200\n",
-      "   macro avg       0.93      0.93      0.93      1200\n",
-      "weighted avg       0.93      0.93      0.93      1200\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\optimize.py:210: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "param_dist = {\n",
-    "    'penalty': ['l1', 'l2', 'elasticnet', 'none'],\n",
-    "    'C': np.logspace(-4, 4, 10),\n",
-    "    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],\n",
-    "    'max_iter': [100, 500, 1000],\n",
-    "}\n",
-    "\n",
-    "# Create the RandomizedSearchCV object with the logistic regression model, hyperparameters, and cross-validation\n",
-    "log_reg = LogisticRegression()\n",
-    "random_search = RandomizedSearchCV(log_reg, param_dist, n_iter=100, cv=3, n_jobs=-1, verbose=1, random_state=42)\n",
-    "\n",
-    "# Fit the random search to the training data\n",
-    "random_search.fit(X_train, y_train)\n",
-    "\n",
-    "# Check the best hyperparameters found\n",
-    "print(\"Best Parameters:\", random_search.best_params_)\n",
-    "print(\"Best Score:\", random_search.best_score_)\n",
-    "\n",
-    "# Use the best estimator for predictions and evaluation\n",
-    "best_model = random_search.best_estimator_\n",
-    "y_pred = best_model.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "report = classification_report(y_test, y_pred)\n",
-    "\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "print(\"Confusion Matrix:\\n\", conf_matrix)\n",
-    "print(\"Classification Report:\\n\", report)\n"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_data = pd.read_csv('TestingDataBinary.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Preprocessing\n",
-    "X_new = test_data\n",
-    "X_new.replace([np.inf, -np.inf], 0, inplace=True)\n",
-    "\n",
-    "# Impute the missing values in the features data\n",
-    "X_imputed_new = imputer.transform(X_new)\n",
-    "\n",
-    "# Scale the features data\n",
-    "X_scaled_new = scaler.transform(X_imputed_new)\n",
-    "\n",
-    "# Apply PCA transformation\n",
-    "X_pca_new = pca.transform(X_scaled_new)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Use the best estimator for predictions on the new data\n",
-    "y_pred_new = best_model.predict(X_pca_new)\n",
-    "\n",
-    "# Save the predictions to a new column in the DataFrame\n",
-    "test_data['predicted_marker'] = y_pred_new\n",
-    "\n",
-    "# Save the updated DataFrame to a new CSV file\n",
-    "test_data.to_csv('TestingDataBinary_with_predictions.csv', index=False)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/part1.ipynb b/part1.ipynb
index 0824cedf423c3bb0134c30884073a94782bd0dbe..4d6c92eebc90ec3d5a30f371405689f5c4a699bc 100644
--- a/part1.ipynb
+++ b/part1.ipynb
@@ -17,7 +17,19 @@
     "import matplotlib.pyplot as plt\n",
     "from sklearn.decomposition import PCA\n",
     "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.model_selection import GridSearchCV"
+    "from sklearn.model_selection import GridSearchCV\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "import pandas as pd\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "from sklearn.model_selection import RandomizedSearchCV\n"
    ]
   },
   {
@@ -34,6 +46,28 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 6000 entries, 0 to 5999\n",
+      "Columns: 129 entries, 1 to 129\n",
+      "dtypes: float64(112), int64(17)\n",
+      "memory usage: 5.9 MB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(train_df.info())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -44,7 +78,7 @@
        " <BarContainer object of 10 artists>)"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -61,7 +95,7 @@
    ],
    "source": [
     "# Create a histogram to show the distribution of a column\n",
-    "plt.hist(train_df['marker'])"
+    "plt.hist(train_df['129'])"
    ]
   },
   {
@@ -73,11 +107,10 @@
     "scaler = StandardScaler()\n",
     "\n",
     "# Separate the features from the target variable\n",
-    "X = train_df.drop('marker', axis=1)\n",
-    "y = train_df['marker']\n",
+    "X = train_df.drop('129', axis=1)\n",
+    "y = train_df['129']\n",
     "\n",
     "#Fix infinite value error\n",
-    "# X[X == np.inf] = np.finfo('float64').max\n",
     "X.replace([np.inf,-np.inf],0,inplace=True)\n",
     "\n",
     "# Create a SimpleImputer object to replace NaN values with the mean value of the corresponding column\n",
@@ -95,101 +128,106 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_components = 100\n",
-    "pca = PCA(n_components=n_components)\n",
-    "principal_components = pca.fit_transform(X_scaled)\n",
-    "\n",
-    "# Create a DataFrame with the loadings\n",
-    "loadings = pd.DataFrame(pca.components_.T, columns=[f'PC{i+1}' for i in range(n_components)], index=X.columns)\n",
-    "\n",
-    "# Apply PCA to the scaled data\n",
-    "# pca = PCA(n_components=100)\n",
-    "# X_pca = pca.fit_transform(X_scaled)\n",
-    "\n",
-    "# Split the data into training and testing sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(pca, y, test_size=0.2,random_state=42)\n",
-    "\n",
-    "# # Train the model on the training data\n",
-    "# lr.fit(X_train, y_train)\n",
-    "\n",
-    "# # Predict the labels for the test data\n",
-    "# y_pred = lr.predict(X_test)\n",
-    "\n",
-    "# # Evaluate the model performance\n",
-    "# print(\"Accuracy:\", accuracy_score(y_test, y_pred))"
+    "#PCA\n",
+    "pca = PCA(n_components=100)\n",
+    "X_pca = pca.fit_transform(X_scaled)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.895\n",
+      "Classification Report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.86      0.94      0.90       588\n",
+      "           1       0.93      0.86      0.89       612\n",
+      "\n",
+      "    accuracy                           0.90      1200\n",
+      "   macro avg       0.90      0.90      0.89      1200\n",
+      "weighted avg       0.90      0.90      0.89      1200\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "\n",
+      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
+      "Please also refer to the documentation for alternative solver options:\n",
+      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+      "  n_iter_i = _check_optimize_result(\n"
+     ]
+    }
+   ],
    "source": [
-    "X_test_pca = pca.transform(X_test_scaled)\n",
-    "clf = LogisticRegression(random_state=42)\n",
-    "clf.fit(X_train_pca, y_train)\n",
-    "\n",
+    "#split data\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)\n",
     "\n",
-    "y_pred = clf.predict(X_test_pca)\n",
+    "#train the model\n",
+    "log_reg = LogisticRegression()\n",
+    "log_reg.fit(X_train, y_train)\n",
     "\n",
-    "# Calculate and print the accuracy of the model\n",
+    "# 5. Evaluate the model on the testing set\n",
+    "y_pred = log_reg.predict(X_test)\n",
     "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(\"Accuracy:\", accuracy)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Read the test dataset\n",
-    "test_df = pd.read_csv('TestingDataBinary.csv')"
+    "\n",
+    "report = classification_report(y_test, y_pred)\n",
+    "\n",
+    "print(\"Accuracy:\", accuracy)\n",
+    "\n",
+    "print(\"Classification Report:\\n\", report)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Top 100 components:\n",
-      " [[ 3.72196354e+00 -5.87941588e+00 -4.02934784e-01 ...  4.15787367e-03\n",
-      "   1.89567282e-03  2.81043971e-03]\n",
-      " [ 1.25401316e+00 -5.82245182e+00 -7.51607953e-01 ...  5.71178351e-04\n",
-      "   1.64284342e-04  3.97691294e-03]\n",
-      " [ 1.24713154e+00 -5.82164239e+00 -7.59379345e-01 ...  3.40089202e-03\n",
-      "   2.59366304e-04  4.28360451e-03]\n",
-      " ...\n",
-      " [-6.89160079e-01 -5.50909843e+00 -4.69952506e-01 ... -2.71254494e-03\n",
-      "  -9.03351989e-05 -2.02581895e-03]\n",
-      " [ 7.34703326e-01 -5.58643030e+00 -5.41845944e-01 ... -3.62008786e-03\n",
-      "  -8.72999728e-05 -2.60358277e-03]\n",
-      " [ 7.35621169e-01 -5.58380312e+00 -5.36559421e-01 ... -3.46823833e-03\n",
-      "   3.30081328e-04 -2.83803266e-03]]\n"
+      "Confusion Matrix:\n",
+      " [[550  38]\n",
+      " [ 88 524]]\n"
      ]
     }
    ],
    "source": [
-    "# explained_variance_ratio = pca.explained_variance_ratio_\n",
-    "\n",
-    "\n",
-    "# sorted_indices = np.argsort(explained_variance_ratio)[::-1]\n",
-    "\n",
-    "# # Get the top 100 components\n",
-    "# top_100_indices = sorted_indices[:100]\n",
-    "# top_100_components = principal_components[:, top_100_indices]\n",
-    "# top_100_explained_variance_ratio = explained_variance_ratio[top_100_indices]\n",
-    "\n",
-    "\n",
-    "# print(\"Top 100 components:\\n\", top_100_components)"
+    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
+    "print(\"Confusion Matrix:\\n\", conf_matrix)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[[True Negatives (TN), False Positives (FP)],\n",
+    " [False Negatives (FN), True Positives (TP)]]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Fine tuning"
    ]
   },
   {
@@ -197,266 +235,279 @@
    "execution_count": 9,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 3 folds for each of 100 candidates, totalling 300 fits\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:378: FitFailedWarning: \n",
+      "120 fits failed out of a total of 300.\n",
+      "The score on these train-test partitions for these parameters will be set to nan.\n",
+      "If these failures are not expected, you can try to debug them by setting error_score='raise'.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "Below are more details about the failures:\n",
+      "--------------------------------------------------------------------------------\n",
+      "24 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "15 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 71, in _check_solver\n",
+      "    raise ValueError(\"penalty='none' is not supported for the liblinear solver\")\n",
+      "ValueError: penalty='none' is not supported for the liblinear solver\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "18 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "6 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1291, in fit\n",
+      "    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, prefer=prefer)(\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\parallel.py\", line 63, in __call__\n",
+      "    return super().__call__(iterable_with_config)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 1085, in __call__\n",
+      "    if self.dispatch_one_batch(iterator):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 901, in dispatch_one_batch\n",
+      "    self._dispatch(tasks)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 819, in _dispatch\n",
+      "    job = self._backend.apply_async(batch, callback=cb)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 208, in apply_async\n",
+      "    result = ImmediateResult(func)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 597, in __init__\n",
+      "    self.results = batch()\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 288, in __call__\n",
+      "    return [func(*args, **kwargs)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\joblib\\parallel.py\", line 288, in <listcomp>\n",
+      "    return [func(*args, **kwargs)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\parallel.py\", line 123, in __call__\n",
+      "    return self.function(*args, **kwargs)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 521, in _logistic_regression_path\n",
+      "    alpha = (1.0 / C) * (1 - l1_ratio)\n",
+      "TypeError: unsupported operand type(s) for -: 'int' and 'NoneType'\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "21 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 64, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "12 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "9 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver sag supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "6 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+      "--------------------------------------------------------------------------------\n",
+      "9 fits failed with the following error:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 686, in _fit_and_score\n",
+      "    estimator.fit(X_train, y_train, **fit_params)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 1162, in fit\n",
+      "    solver = _check_solver(self.solver, self.penalty, self.dual)\n",
+      "  File \"c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py\", line 54, in _check_solver\n",
+      "    raise ValueError(\n",
+      "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.\n",
       "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "  warnings.warn(some_fits_failed_message, FitFailedWarning)\n",
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\model_selection\\_search.py:952: UserWarning: One or more of the test scores are non-finite: [       nan 0.87666667 0.92083333        nan        nan 0.87416667\n",
+      "        nan 0.87666667 0.864375   0.87645833 0.78208333 0.87854167\n",
+      " 0.72333333 0.87854167 0.85645833        nan        nan        nan\n",
+      " 0.85083333 0.72333333 0.5025     0.92020833 0.78208333 0.918125\n",
+      " 0.86458333 0.87666667        nan 0.9225     0.90375           nan\n",
+      " 0.78208333        nan 0.5025            nan        nan        nan\n",
+      "        nan 0.78208333        nan 0.78208333 0.85645833 0.628125\n",
+      " 0.918125          nan 0.49916667 0.85875           nan 0.49916667\n",
+      "        nan        nan 0.87791667 0.86520833        nan 0.9225\n",
+      "        nan 0.918125   0.865625   0.84166667        nan 0.9225\n",
+      " 0.90375    0.918125   0.87375    0.918125   0.864375          nan\n",
+      "        nan 0.87666667        nan 0.90375    0.85625    0.62895833\n",
+      "        nan        nan 0.85625           nan        nan 0.87854167\n",
+      " 0.85645833        nan 0.87791667 0.90395833 0.87854167        nan\n",
+      "        nan 0.87375    0.78208333 0.87666667        nan        nan\n",
+      " 0.78208333 0.90270833        nan        nan 0.85625           nan\n",
+      " 0.86583333        nan        nan        nan]\n",
       "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1173: FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.\n",
       "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n",
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
-      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
-      "\n",
-      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
-      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
-      "Please also refer to the documentation for alternative solver options:\n",
-      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
-      "  n_iter_i = _check_optimize_result(\n"
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1181: UserWarning: Setting penalty=None will ignore the C and l1_ratio parameters\n",
+      "  warnings.warn(\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Best hyperparameters:  {'C': 100, 'solver': 'liblinear'}\n",
-      "Best accuracy score:  0.8968333333333334\n"
+      "Best Parameters: {'solver': 'newton-cg', 'penalty': 'none', 'max_iter': 100, 'C': 0.005994842503189409}\n",
+      "Best Score: 0.9225\n",
+      "Accuracy: 0.9325\n",
+      "Confusion Matrix:\n",
+      " [[557  31]\n",
+      " [ 50 562]]\n",
+      "Classification Report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.92      0.95      0.93       588\n",
+      "           1       0.95      0.92      0.93       612\n",
+      "\n",
+      "    accuracy                           0.93      1200\n",
+      "   macro avg       0.93      0.93      0.93      1200\n",
+      "weighted avg       0.93      0.93      0.93      1200\n",
+      "\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\utils\\optimize.py:210: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations.\n",
       "  warnings.warn(\n"
      ]
     }
    ],
    "source": [
-    "# Create a Logistic Regression model\n",
-    "lr = LogisticRegression()\n",
     "\n",
-    "# Define the parameter grid to search over\n",
-    "param_grid = {'C': [0.1, 1, 10, 100], 'solver': ['liblinear', 'lbfgs']}\n",
+    "param_dist = {\n",
+    "    'penalty': ['l1', 'l2', 'elasticnet', 'none'],\n",
+    "    'C': np.logspace(-4, 4, 10),\n",
+    "    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],\n",
+    "    'max_iter': [100, 500, 1000],\n",
+    "}\n",
+    "\n",
+    "# Create the RandomizedSearchCV object with the logistic regression model, hyperparameters, and cross-validation\n",
+    "log_reg = LogisticRegression()\n",
+    "random_search = RandomizedSearchCV(log_reg, param_dist, n_iter=100, cv=3, n_jobs=-1, verbose=1, random_state=42)\n",
     "\n",
-    "# Create a GridSearchCV object and fit it to the data\n",
-    "grid_search = GridSearchCV(lr, param_grid, cv=5)\n",
-    "grid_search.fit(X_scaled, y)\n",
+    "# Fit the random search to the training data\n",
+    "random_search.fit(X_train, y_train)\n",
     "\n",
-    "# Print the best hyperparameters and the corresponding accuracy score\n",
-    "print(\"Best hyperparameters: \", grid_search.best_params_)\n",
-    "print(\"Best accuracy score: \", grid_search.best_score_)"
+    "# Check the best hyperparameters found\n",
+    "print(\"Best Parameters:\", random_search.best_params_)\n",
+    "print(\"Best Score:\", random_search.best_score_)\n",
+    "\n",
+    "# Use the best estimator for predictions and evaluation\n",
+    "best_model = random_search.best_estimator_\n",
+    "y_pred = best_model.predict(X_test)\n",
+    "accuracy = accuracy_score(y_test, y_pred)\n",
+    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
+    "report = classification_report(y_test, y_pred)\n",
+    "\n",
+    "print(\"Accuracy:\", accuracy)\n",
+    "print(\"Confusion Matrix:\\n\", conf_matrix)\n",
+    "print(\"Classification Report:\\n\", report)\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Predict"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy: 0.9158333333333334\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "c:\\Users\\user\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\sklearn\\svm\\_base.py:1244: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
+   "source": [
+    "test_data = pd.read_csv('TestingDataBinary.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "lr = LogisticRegression(C=100, solver='liblinear')\n",
-    "# Train the model on the training data\n",
-    "lr.fit(X_train, y_train)\n",
+    "# Preprocessing\n",
+    "X_new = test_data\n",
+    "X_new.replace([np.inf, -np.inf], 0, inplace=True)\n",
+    "\n",
+    "# Impute the missing values in the features data\n",
+    "X_imputed_new = imputer.transform(X_new)\n",
     "\n",
-    "# Predict the labels for the test data\n",
-    "y_pred = lr.predict(X_test)\n",
+    "# Scale the features data\n",
+    "X_scaled_new = scaler.transform(X_imputed_new)\n",
     "\n",
-    "# Evaluate the model performance\n",
-    "print(\"Accuracy:\", accuracy_score(y_test, y_pred))"
+    "# Apply PCA transformation\n",
+    "X_pca_new = pca.transform(X_scaled_new)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Normalize the features\n",
-    "test_df_scaled = scaler.transform(test_df)\n",
-    "\n",
-    "# Select the top 15 features\n",
-    "test_df_selected = test_df_scaled[:, :top_n]\n",
+    "# Use the best estimator for predictions on the new data\n",
+    "y_pred_new = best_model.predict(X_pca_new)\n",
     "\n",
-    "# Use the chosen model to predict AQI scores for the test dataset\n",
-    "test_predictions = rf_reg_selected.predict(test_df_selected)\n",
+    "# Save the predictions to a new column in the DataFrame\n",
+    "test_data['predicted_marker'] = y_pred_new\n",
     "\n",
-    "# Save the predictions to the subs.csv file\n",
-    "submission_df = pd.DataFrame({'AQI_Bucket': test_predictions})\n",
-    "# submission_df.to_csv(\"C:\\Users\\andre\\Downloads\\subs.csv\", index=False)"
+    "# Save the updated DataFrame to a new CSV file\n",
+    "test_data.to_csv('TestingDataBinary_with_predictions.csv', index=False)"
    ]
   }
  ],
diff --git a/part1.py b/part1.py
deleted file mode 100644
index 251f068a0805baebb11d5dc54c9bb4a4450db15a..0000000000000000000000000000000000000000
--- a/part1.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import pandas as pd
-import numpy as np
-import sklearn
-import scipy
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
-import matplotlib.pyplot as plt
-
-#Read CSV file as Pandas Dataframe
-train_df = pd.read_csv('TrainingDataBinary.csv')
-test_df = pd.read_csv('TestingDataBinary.csv')
-
-#Confirm reading of files
-print(train_df.head)
-print("----------------------------------")
-print(test_df.head)
-
-# Get the summary statistics of the data
-print(train_df.describe())
-
-# Get the information about the columns of the DataFrame
-print(train_df.info())
-
-# Create a histogram to show the distribution of a column
-plt.hist(train_df['marker'])
\ No newline at end of file