From 95df944eb37fdbf400d99157729707f50778eddf Mon Sep 17 00:00:00 2001 From: bav1g20 <bav1g20@soton.ac.uk> Date: Wed, 7 Jun 2023 16:59:02 +0200 Subject: [PATCH] Updated README --- README.md | 235 ++++++++++++++---------------------------------------- 1 file changed, 60 insertions(+), 175 deletions(-) diff --git a/README.md b/README.md index ad4a1f1..a05cce6 100644 --- a/README.md +++ b/README.md @@ -1,176 +1,61 @@ -# Created by https://www.toptal.com/developers/gitignore/api/python -# Edit at https://www.toptal.com/developers/gitignore?templates=python +[GitLab Link](https://git.soton.ac.uk/bav1g20/compsys-coursework2) + +# Part A + +The approach taken for this task is inspired by earlier work done in the year for Computer Vision (COMP3204) to approach this machine learning task. In the earlier work I came across [AutoGluon](https://auto.gluon.ai/stable/index.html#), an automatic machine learning library for Python. It simplifies model training using many different methods available and, where possible, it attempts to improve the model by itself. +Using this approach, for this part, it deduced that the best performing model, when looking at accuracy, was + +| Model | Accuracy | +| ------------------- | -------- | +| LightGBM | 0.998333 | +| WeightedEnsemble_L2 | 0.998333 | +| LightGBMXT | 0.996667 | +| XGBoost | 0.995000 | +| RandomForestGini | 0.995000 | +| ExtraTreesEntr | 0.995000 | +| CatBoost | 0.991667 | +| LightGBMLarge | 0.990000 | +| RandomForestEntr | 0.990000 | +| NeuralNetTorch | 0.976667 | +| NeuralNetFastAI | 0.951667 | +| KNeighborsDist | 0.933333 | +| KNeighborsUnif | 0.905000 | + +The chosen model in this case is `WeightedEnsemble_L2` + +The calculated labels for this dataset was done sequentially, from `0` to `128` for the last column +As an example, this is what that would look like + +| 0 | 1 | 2 | ... | 126 | 127 | 128 | +| :-------: | :---------: | :--------: | :-: | :-: | :-: | :-: | +| 70.399324 | 127673.0908 | -49.572308 | ... | 0 | 0 | 0 | +| 73.688102 | 130280.7109 | -46.300719 | ... | 0 | 0 | 0 | + +This data was pulled directly from `TrainingDataBinary.csv` +As `WeightedEnsemble_L2` has an accuracy of 99.833%, I did not deem that it required any further tuning, and was therefore used for prediction to output `TestingResultsBinary.csv` + +--- + +# Part B + +This part used the same approach as in [[#Part A]] + +| Model | Accuracy | +| ------------------- | -------- | +| LightGBMXT | 0.978333 | +| WeightedEnsemble_L2 | 0.978333 | +| LightGBM | 0.976667 | +| XGBoost | 0.973333 | +| LightGBMLarge | 0.970000 | +| ExtraTreesGini | 0.970000 | +| RandomForestGini | 0.968333 | +| ExtraTreesEntr | 0.968333 | +| RandomForestEntr | 0.966667 | +| CatBoost | 0.965000 | +| NeuralNetTorch | 0.881667 | +| NeuralNetFastAI | 0.876667 | +| KNeighborsDist | 0.856667 | +| KNeighborsUnif | 0.826667 | + +For this part `WeightedEnsemble_L2` was also chosen as the goto model for prediction with 97.833% accuracy -### Python ### -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - -### Python Patch ### -# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration -poetry.toml - -# ruff -.ruff_cache/ - -# LSP config files -pyrightconfig.json - -# End of https://www.toptal.com/developers/gitignore/api/python -- GitLab