diff --git a/README.md b/README.md index 642e0fd4b7c13c340aae01a159e8b3d7b358cd26..620a009bf7cfb5979ae9d39976b4f2af03630121 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,9 @@ To deploy using production settings you must: ## Configuring PEDASI Both PEDASI and Django are able to be configured via a `.env` file in the project root. -The only required configuration property is the Django SECRET_KEY which should be a randomly generated -character sequence. +The required configuration properties are: +- SECRET_KEY - should be a randomly generated value +- DATABASE_USER +- DATABASE_PASSWORD - should be a randomly generated value Other configuration properties are described at the top of `pedasi/settings.py`. diff --git a/datasources/connectors/csv.py b/datasources/connectors/csv.py index 700cdc3bc98939b1b57f110da1533b3e9be1f86d..d550a3a6f90ce9e75f1dbd5522d7dfea08e98602 100644 --- a/datasources/connectors/csv.py +++ b/datasources/connectors/csv.py @@ -1,12 +1,49 @@ +import pathlib import typing +from django.conf import settings + from .base import DataSetConnector, DummyRequestsResponse +# TODO this still allows users to access the data of other users +def in_permitted_directory(path: typing.Union[pathlib.Path, str]) -> bool: + """ + Is the file being accessed in a permitted directory? + + Permitted directories are: + - MEDIA_ROOT + - BASE_DIR/data - if in debug mode + + :param path: File path to check + :return: Is file in a permitted directory? + """ + path = pathlib.Path(path) + root_path = pathlib.Path(settings.MEDIA_ROOT) + test_files_path = pathlib.Path(settings.BASE_DIR).joinpath('data') + + if root_path in path.parents: + return True + + elif settings.DEBUG and test_files_path in path.parents: + return True + + return False + + class CsvConnector(DataSetConnector): """ Data connector for retrieving data from CSV files. """ + def __init__(self, location: str, + api_key: typing.Optional[str] = None, + auth: typing.Optional[typing.Callable] = None, + **kwargs): + if not in_permitted_directory(location): + raise PermissionError('File being accessed is not within the permitted directory') + + super().__init__(location, api_key, auth, **kwargs) + def get_response(self, params: typing.Optional[typing.Mapping[str, str]] = None): """ diff --git a/datasources/models.py b/datasources/models.py index 865890c4bcde2465797a94e3d7e4480fbffbacb5..254393443aad13101cc3ad55ca713316ccc50788 100644 --- a/datasources/models.py +++ b/datasources/models.py @@ -221,10 +221,11 @@ class DataSource(BaseAppDataModel): self.data_connector.get_metadata(), indent=4 )) - except (KeyError, NotImplementedError, ValueError): + except (KeyError, NotImplementedError, ValueError, PermissionError): # KeyError: Plugin was not found # NotImplementedError: Plugin does not support metadata # ValueError: Plugin was not set + # PermissionError: File exists outside of permitted directory - not the responsibility of the search record pass result = '\n'.join(lines) diff --git a/pedasi/settings.py b/pedasi/settings.py index 1786cc00ec1fa299bbe62696f55286badff1ba2e..2d9e171c4b2f32c3f242c5f0fde0a4d285c94ef7 100644 --- a/pedasi/settings.py +++ b/pedasi/settings.py @@ -303,3 +303,8 @@ STATICFILES_DIRS = [ os.path.join(BASE_DIR, 'pedasi', 'static'), os.path.join(BASE_DIR, 'docs', 'build'), ] + +# Media directory - files uploaded by users + +MEDIA_URL = '/media/' +MEDIA_ROOT = os.path.join(BASE_DIR, 'media')