From d4bc8cb4f3d852121bc2b5d4b71fff6f7d5d1366 Mon Sep 17 00:00:00 2001 From: James Graham <J.Graham@software.ac.uk> Date: Wed, 14 Nov 2018 13:38:31 +0000 Subject: [PATCH] Add check to CsvConnector to prevent downloading of Django files - #19 --- README.md | 6 ++++-- datasources/connectors/csv.py | 37 +++++++++++++++++++++++++++++++++++ datasources/models.py | 3 ++- pedasi/settings.py | 5 +++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 642e0fd..620a009 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,9 @@ To deploy using production settings you must: ## Configuring PEDASI Both PEDASI and Django are able to be configured via a `.env` file in the project root. -The only required configuration property is the Django SECRET_KEY which should be a randomly generated -character sequence. +The required configuration properties are: +- SECRET_KEY - should be a randomly generated value +- DATABASE_USER +- DATABASE_PASSWORD - should be a randomly generated value Other configuration properties are described at the top of `pedasi/settings.py`. diff --git a/datasources/connectors/csv.py b/datasources/connectors/csv.py index 700cdc3..d550a3a 100644 --- a/datasources/connectors/csv.py +++ b/datasources/connectors/csv.py @@ -1,12 +1,49 @@ +import pathlib import typing +from django.conf import settings + from .base import DataSetConnector, DummyRequestsResponse +# TODO this still allows users to access the data of other users +def in_permitted_directory(path: typing.Union[pathlib.Path, str]) -> bool: + """ + Is the file being accessed in a permitted directory? + + Permitted directories are: + - MEDIA_ROOT + - BASE_DIR/data - if in debug mode + + :param path: File path to check + :return: Is file in a permitted directory? + """ + path = pathlib.Path(path) + root_path = pathlib.Path(settings.MEDIA_ROOT) + test_files_path = pathlib.Path(settings.BASE_DIR).joinpath('data') + + if root_path in path.parents: + return True + + elif settings.DEBUG and test_files_path in path.parents: + return True + + return False + + class CsvConnector(DataSetConnector): """ Data connector for retrieving data from CSV files. """ + def __init__(self, location: str, + api_key: typing.Optional[str] = None, + auth: typing.Optional[typing.Callable] = None, + **kwargs): + if not in_permitted_directory(location): + raise PermissionError('File being accessed is not within the permitted directory') + + super().__init__(location, api_key, auth, **kwargs) + def get_response(self, params: typing.Optional[typing.Mapping[str, str]] = None): """ diff --git a/datasources/models.py b/datasources/models.py index 865890c..2543934 100644 --- a/datasources/models.py +++ b/datasources/models.py @@ -221,10 +221,11 @@ class DataSource(BaseAppDataModel): self.data_connector.get_metadata(), indent=4 )) - except (KeyError, NotImplementedError, ValueError): + except (KeyError, NotImplementedError, ValueError, PermissionError): # KeyError: Plugin was not found # NotImplementedError: Plugin does not support metadata # ValueError: Plugin was not set + # PermissionError: File exists outside of permitted directory - not the responsibility of the search record pass result = '\n'.join(lines) diff --git a/pedasi/settings.py b/pedasi/settings.py index 1786cc0..2d9e171 100644 --- a/pedasi/settings.py +++ b/pedasi/settings.py @@ -303,3 +303,8 @@ STATICFILES_DIRS = [ os.path.join(BASE_DIR, 'pedasi', 'static'), os.path.join(BASE_DIR, 'docs', 'build'), ] + +# Media directory - files uploaded by users + +MEDIA_URL = '/media/' +MEDIA_ROOT = os.path.join(BASE_DIR, 'media') -- GitLab