From d4bc8cb4f3d852121bc2b5d4b71fff6f7d5d1366 Mon Sep 17 00:00:00 2001
From: James Graham <J.Graham@software.ac.uk>
Date: Wed, 14 Nov 2018 13:38:31 +0000
Subject: [PATCH] Add check to CsvConnector to prevent downloading of Django
 files - #19

---
 README.md                     |  6 ++++--
 datasources/connectors/csv.py | 37 +++++++++++++++++++++++++++++++++++
 datasources/models.py         |  3 ++-
 pedasi/settings.py            |  5 +++++
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 642e0fd..620a009 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,9 @@ To deploy using production settings you must:
 ## Configuring PEDASI
 Both PEDASI and Django are able to be configured via a `.env` file in the project root.
 
-The only required configuration property is the Django SECRET_KEY which should be a randomly generated
-character sequence.
+The required configuration properties are:
+- SECRET_KEY - should be a randomly generated value
+- DATABASE_USER
+- DATABASE_PASSWORD - should be a randomly generated value
 
 Other configuration properties are described at the top of `pedasi/settings.py`.
diff --git a/datasources/connectors/csv.py b/datasources/connectors/csv.py
index 700cdc3..d550a3a 100644
--- a/datasources/connectors/csv.py
+++ b/datasources/connectors/csv.py
@@ -1,12 +1,49 @@
+import pathlib
 import typing
 
+from django.conf import settings
+
 from .base import DataSetConnector, DummyRequestsResponse
 
 
+# TODO this still allows users to access the data of other users
+def in_permitted_directory(path: typing.Union[pathlib.Path, str]) -> bool:
+    """
+    Is the file being accessed in a permitted directory?
+
+    Permitted directories are:
+    - MEDIA_ROOT
+    - BASE_DIR/data - if in debug mode
+
+    :param path: File path to check
+    :return: Is file in a permitted directory?
+    """
+    path = pathlib.Path(path)
+    root_path = pathlib.Path(settings.MEDIA_ROOT)
+    test_files_path = pathlib.Path(settings.BASE_DIR).joinpath('data')
+
+    if root_path in path.parents:
+        return True
+
+    elif settings.DEBUG and test_files_path in path.parents:
+        return True
+
+    return False
+
+
 class CsvConnector(DataSetConnector):
     """
     Data connector for retrieving data from CSV files.
     """
+    def __init__(self, location: str,
+                 api_key: typing.Optional[str] = None,
+                 auth: typing.Optional[typing.Callable] = None,
+                 **kwargs):
+        if not in_permitted_directory(location):
+            raise PermissionError('File being accessed is not within the permitted directory')
+
+        super().__init__(location, api_key, auth, **kwargs)
+
     def get_response(self,
                      params: typing.Optional[typing.Mapping[str, str]] = None):
         """
diff --git a/datasources/models.py b/datasources/models.py
index 865890c..2543934 100644
--- a/datasources/models.py
+++ b/datasources/models.py
@@ -221,10 +221,11 @@ class DataSource(BaseAppDataModel):
                 self.data_connector.get_metadata(),
                 indent=4
             ))
-        except (KeyError, NotImplementedError, ValueError):
+        except (KeyError, NotImplementedError, ValueError, PermissionError):
             # KeyError: Plugin was not found
             # NotImplementedError: Plugin does not support metadata
             # ValueError: Plugin was not set
+            # PermissionError: File exists outside of permitted directory - not the responsibility of the search record
             pass
 
         result = '\n'.join(lines)
diff --git a/pedasi/settings.py b/pedasi/settings.py
index 1786cc0..2d9e171 100644
--- a/pedasi/settings.py
+++ b/pedasi/settings.py
@@ -303,3 +303,8 @@ STATICFILES_DIRS = [
     os.path.join(BASE_DIR, 'pedasi', 'static'),
     os.path.join(BASE_DIR, 'docs', 'build'),
 ]
+
+# Media directory - files uploaded by users
+
+MEDIA_URL = '/media/'
+MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
-- 
GitLab