diff --git a/api/tests.py b/api/tests.py index 29c3688fb587fcbb4eb3cc85a690d1558f81de0b..6b6566fe37728f25ebf9996fddcd1fd7d8db6cbb 100644 --- a/api/tests.py +++ b/api/tests.py @@ -6,7 +6,7 @@ from django.test import Client, TestCase from rest_framework.authtoken.models import Token from rest_framework.test import APIClient -from datasources import models +from datasources import connectors, models class RootApiTest(TestCase): @@ -499,7 +499,7 @@ class DataSourceApiHyperCatTest(TestCase): url=cls.test_url, api_key=cls.api_key, plugin_name=cls.plugin_name, - auth_method=models.DataSource.determine_auth_method(cls.test_url, cls.api_key) + auth_method=connectors.BaseDataConnector.determine_auth_method(cls.test_url, cls.api_key) ) def setUp(self): diff --git a/applications/views.py b/applications/views.py index 77dc4788c3a5cc04af7d50b6d29a5e6b3e5f04c3..7f318167bba3c7f81c5ac8dcd8e03cd3a1448768 100644 --- a/applications/views.py +++ b/applications/views.py @@ -7,8 +7,8 @@ from django.views.generic.list import ListView from rest_framework.authtoken.models import Token from . import models -from core.permissions import OwnerPermissionMixin from core.views import ManageAccessView +from profiles.permissions import OwnerPermissionMixin class ApplicationListView(ListView): diff --git a/core/permissions.py b/core/permissions.py deleted file mode 100644 index ddd75cb0fb2d056faef6e45de3525a501b4b2876..0000000000000000000000000000000000000000 --- a/core/permissions.py +++ /dev/null @@ -1,6 +0,0 @@ -from django.contrib.auth.mixins import UserPassesTestMixin - - -class OwnerPermissionMixin(UserPassesTestMixin): - def test_func(self): - return self.request.user == self.get_object().owner or self.request.user.is_superuser diff --git a/datasources/apps.py b/datasources/apps.py index 5b1910ab51717e5e2128e8317808630b66a9f232..2dd269adc0ad83c49fb2c526a1d0c15b42324dea 100644 --- a/datasources/apps.py +++ b/datasources/apps.py @@ -1,5 +1,26 @@ +import logging + from django.apps import AppConfig +from django.db.utils import ProgrammingError + + +logger = logging.getLogger(__name__) class DatasourcesConfig(AppConfig): name = 'datasources' + + @staticmethod + def create_operational_metadata(): + from datasources.models import MetadataField + + MetadataField.load_inline_fixtures() + + def ready(self): + # Runs after app registry is populated - i.e. all models exist and are importable + try: + self.create_operational_metadata() + logging.info('Loaded inline MetadataField fixtures') + + except ProgrammingError: + logging.warning('Could not create MetadataField fixtures, database has not been initialized') diff --git a/datasources/connectors/base.py b/datasources/connectors/base.py index 38589688d1e97427b2f5252e9433d73e998c18af..9ce115877702be2cacf0ee82f654472a61718e6a 100644 --- a/datasources/connectors/base.py +++ b/datasources/connectors/base.py @@ -100,6 +100,41 @@ class BaseDataConnector(metaclass=plugin.Plugin): def request_count(self): return self._request_counter.count() + # TODO make normal method + @staticmethod + def determine_auth_method(url: str, api_key: str) -> AuthMethod: + """ + Determine which authentication method to use to access the data source. + + Test each known authentication method in turn until one succeeds. + + :param url: URL to authenticate against + :param api_key: API key to use for authentication + :return: First successful authentication method + """ + # If not using an API key - can't require auth + if not api_key: + return AuthMethod.NONE + + for auth_method_id, auth_function in REQUEST_AUTH_FUNCTIONS.items(): + try: + # Can we get a response using this auth method? + if auth_function is None: + response = requests.get(url) + + else: + response = requests.get(url, + auth=auth_function(api_key, '')) + + response.raise_for_status() + return auth_method_id + + except requests.exceptions.HTTPError: + pass + + # None of the attempted authentication methods was successful + raise requests.exceptions.ConnectionError('Could not authenticate against external API') + def get_metadata(self, params: typing.Optional[typing.Mapping[str, str]] = None): """ diff --git a/datasources/connectors/csv.py b/datasources/connectors/csv.py index 2aa9cb2a1c61ed7afd1b168975b1b5b0f9770104..4de3b4d67365309211a00397cafff205d4687648 100644 --- a/datasources/connectors/csv.py +++ b/datasources/connectors/csv.py @@ -162,10 +162,9 @@ class CsvToMongoConnector(InternalDataConnector, DataSetConnector): params = {key: _type_convert(val) for key, val in params.items()} with context_managers.switch_collection(CsvRow, self.location) as collection: - records = collection.objects.filter(**params) + records = collection.objects.filter(**params).exclude('_id') - # To get dictionary from MongoEngine records we need to go via JSON string - data = json.loads(records.exclude('_id').to_json()) + data = list(records.as_pymongo()) # Couldn't store field 'id' in document - recover it for item in data: diff --git a/datasources/forms.py b/datasources/forms.py index a5477b241e523ab4c1b3376129adff893de15278..e3678de99976f8f6f5c6ad0184d4db9b95e8058d 100644 --- a/datasources/forms.py +++ b/datasources/forms.py @@ -24,7 +24,8 @@ class DataSourceForm(forms.ModelForm): cleaned_data = super().clean() try: - cleaned_data['auth_method'] = models.DataSource.determine_auth_method( + # TODO construct and actual data connector instance here + cleaned_data['auth_method'] = connectors.BaseDataConnector.determine_auth_method( cleaned_data['url'], cleaned_data['api_key'] ) diff --git a/datasources/models.py b/datasources/models.py index c95627b513885a0ce2149e2e135bac3c5d871c4e..b1f00c92459e2f0a0ccb383d3e0c65c41b4ccc46 100644 --- a/datasources/models.py +++ b/datasources/models.py @@ -1,10 +1,13 @@ +""" +This module contains the Django models necessary to manage the set of data sources. +""" + import contextlib import enum import json import typing from django.conf import settings -from django.contrib.auth.models import Group from django.core import validators from django.db import models from django.urls import reverse @@ -75,18 +78,19 @@ class MetadataField(models.Model): blank=False, null=False) #: Short text identifier for the field - short_name = models.CharField(max_length=MAX_LENGTH_NAME, - validators=[ - validators.RegexValidator( - '^[a-zA-Z][a-zA-Z0-9_]*\Z', - 'Short name must begin with a letter and consist only of letters, numbers and underscores.', - 'invalid' - ) - ], - unique=True, - blank=False, null=False) + short_name = models.CharField( + max_length=MAX_LENGTH_NAME, + validators=[ + validators.RegexValidator( + r'^[a-zA-Z][a-zA-Z0-9_]*\Z', + 'Short name must begin with a letter and consist only of letters, numbers and underscores.', + 'invalid' + ) + ], + unique=True, + blank=False, null=False + ) - # TODO create all operational fields if missing #: Does the field have an operational effect within PEDASI? operational = models.BooleanField(default=False, blank=False, null=False) @@ -94,6 +98,26 @@ class MetadataField(models.Model): def __str__(self): return self.name + @classmethod + def load_inline_fixtures(cls): + """ + Create any instances required for the functioning of PEDASI. + + This is called from within the AppConfig. + """ + fixtures = ( + ('data_query_param', 'data_query_param', True), + ('indexed_field', 'indexed_field', True), + ) + + for name, short_name, operational in fixtures: + obj, created = cls.objects.get_or_create( + name=name, + short_name=short_name + ) + obj.operational = operational + obj.save() + class MetadataItem(models.Model): """ @@ -329,10 +353,18 @@ class DataSource(BaseAppDataModel): @property def is_catalogue(self) -> bool: + """ + Is this data source a data catalogue? + """ return self.data_connector_class.is_catalogue @property def connector_string(self): + """ + Get the string used to locate the resource associated with this data source. + + e.g. URL, SQL table identifier, etc. + """ if self._connector_string: return self._connector_string return self.url @@ -349,39 +381,51 @@ class DataSource(BaseAppDataModel): try: plugin = BaseDataConnector.get_plugin(self.plugin_name) - except KeyError as e: + except KeyError as exc: if not self.plugin_name: - raise ValueError('Data source plugin is not set') from e + raise ValueError('Data source plugin is not set') from exc - raise KeyError('Data source plugin not found') from e + raise KeyError('Data source plugin not found') from exc return plugin + def _get_data_connector(self) -> BaseDataConnector: + """ + Construct the data connector for this source. + + :return: Data connector instance + """ + plugin = self.data_connector_class + + if not self.api_key: + data_connector = plugin(self.connector_string) + + else: + # Is the authentication method set? + auth_method = AuthMethod(self.auth_method) + if not auth_method: + auth_method = plugin.determine_auth_method(self.url, self.api_key) + + # Inject function to get authenticated request + auth_class = REQUEST_AUTH_FUNCTIONS[auth_method] + + data_connector = plugin(self.connector_string, self.api_key, + auth=auth_class) + + return data_connector + @property @contextlib.contextmanager def data_connector(self) -> BaseDataConnector: """ Context manager to construct the data connector for this source. + When the context manager is closed, the number of requests to the external API will be added to the total. + :return: Data connector instance """ if self._data_connector is None: - plugin = self.data_connector_class - - if not self.api_key: - self._data_connector = plugin(self.connector_string) - - else: - # Is the authentication method set? - auth_method = AuthMethod(self.auth_method) - if not auth_method: - auth_method = self.determine_auth_method(self.url, self.api_key) - - # Inject function to get authenticated request - auth_class = REQUEST_AUTH_FUNCTIONS[auth_method] - - self._data_connector = plugin(self.connector_string, self.api_key, - auth=auth_class) + self._data_connector = self._get_data_connector() try: # Returns as context manager @@ -396,6 +440,11 @@ class DataSource(BaseAppDataModel): @property def search_representation(self) -> str: + """ + Provide a text representation of this data source to be entered into a search index. + + :return: Text representation of this data source + """ lines = [ self.name, self.owner.get_full_name(), @@ -403,12 +452,21 @@ class DataSource(BaseAppDataModel): ] try: + # Using the data_connector context manager results in an infinite recursion: + # 1. Save data source + # 2. Get search representation (this function) + # 3. Close data connector context manager + # 4. Save data source -> ... + + data_connector = self._get_data_connector() + metadata = data_connector.get_metadata() + lines.append(json.dumps( - self.data_connector.get_metadata(), + metadata, indent=4 )) - except: + except (KeyError, NotImplementedError, ValueError): # KeyError: Plugin was not found # NotImplementedError: Plugin does not support metadata # ValueError: Plugin was not set @@ -417,31 +475,6 @@ class DataSource(BaseAppDataModel): result = '\n'.join(lines) return result - @staticmethod - def determine_auth_method(url: str, api_key: str) -> AuthMethod: - # If not using an API key - can't require auth - if not api_key: - return AuthMethod.NONE - - for auth_method_id, auth_function in REQUEST_AUTH_FUNCTIONS.items(): - try: - # Can we get a response using this auth method? - if auth_function is None: - response = requests.get(url) - - else: - response = requests.get(url, - auth=auth_function(api_key, '')) - - response.raise_for_status() - return auth_method_id - - except requests.exceptions.HTTPError: - pass - - # None of the attempted authentication methods was successful - raise requests.exceptions.ConnectionError('Could not authenticate against external API') - def get_absolute_url(self): return reverse('datasources:datasource.detail', kwargs={'pk': self.pk}) diff --git a/datasources/search_indexes.py b/datasources/search_indexes.py index bff3740e06661e11ae3006ce3afe160ebf23ebcd..d24a4442cedfa5099038b58467ba3a1f581207a2 100644 --- a/datasources/search_indexes.py +++ b/datasources/search_indexes.py @@ -1,9 +1,22 @@ +""" +This module contains the search index definitions for the datasource app using Haystack. + +See https://django-haystack.readthedocs.io/en/master/ for documentation. +""" + from haystack import indexes + from . import models class DataSourceIndex(indexes.SearchIndex, indexes.Indexable): + """ + The search index definition for a DataSource. + + Uses templates/search/indexes/datasources/datasource_text.txt and + :meth:`datasources.models.DataSource.search_representation`. + """ text = indexes.CharField(document=True, use_template=True) def get_model(self): diff --git a/datasources/tests/test_connectors.py b/datasources/tests/test_connectors.py index 8527559159265b2990a24ed6ad2120a6b88a9110..00296f0dfcb401edd16d66fa107b6e3654e44dda 100644 --- a/datasources/tests/test_connectors.py +++ b/datasources/tests/test_connectors.py @@ -1,6 +1,6 @@ from django.test import TestCase -from datasources.connectors.base import BaseDataConnector +from datasources.connectors.base import AuthMethod, BaseDataConnector class ConnectorPluginTest(TestCase): @@ -86,6 +86,13 @@ class ConnectorIoTUKTest(TestCase): self.assertIn('data', result) self.assertGreater(len(result['data']), 0) + def test_determine_auth(self): + connection = self._get_connection() + + auth_method = connection.determine_auth_method(connection.location, connection.api_key) + + self.assertEqual(AuthMethod.NONE, auth_method) + class ConnectorRestApiTest(TestCase): url = 'https://api.iotuk.org.uk/' diff --git a/datasources/tests/test_connectors_hypercat.py b/datasources/tests/test_connectors_hypercat.py index a34bff6a05a8caaa7aeb4464f9f0cea4132332c7..86dfae35ffbe7c194aeff1b93426587df8bae117 100644 --- a/datasources/tests/test_connectors_hypercat.py +++ b/datasources/tests/test_connectors_hypercat.py @@ -4,7 +4,7 @@ import typing from django.test import TestCase from requests.auth import HTTPBasicAuth -from datasources.connectors.base import BaseDataConnector, HttpHeaderAuth +from datasources.connectors.base import AuthMethod, BaseDataConnector, HttpHeaderAuth def _get_item_by_key_value(collection: typing.Iterable[typing.Mapping], @@ -181,6 +181,7 @@ class ConnectorHyperCatTest(TestCase): class ConnectorHyperCatCiscoTest(TestCase): + # TODO find working dataset url = 'https://api.cityverve.org.uk/v1/cat' subcatalogue = 'https://api.cityverve.org.uk/v1/cat/polling-station' dataset = 'https://api.cityverve.org.uk/v1/entity/polling-station/5' @@ -212,6 +213,13 @@ class ConnectorHyperCatCiscoTest(TestCase): self.assertTrue(connection.is_catalogue) + def test_determine_auth(self): + connection = self._get_connection() + + auth_method = connection.determine_auth_method(connection.location, connection.api_key) + + self.assertEqual(AuthMethod.HEADER, auth_method) + def test_plugin_get_catalogue_metadata(self): connection = self._get_connection() diff --git a/datasources/views/datasource.py b/datasources/views/datasource.py index 736d4ea966dcb290b236bf71845668872d1cc1bf..dbc90b468aa9ef7953e76ac2fd5dd1ecd261dc01 100644 --- a/datasources/views/datasource.py +++ b/datasources/views/datasource.py @@ -11,9 +11,9 @@ from rest_framework import serializers from rest_framework.views import APIView import requests.exceptions -from core.permissions import OwnerPermissionMixin from datasources import forms, models from datasources.permissions import HasPermissionLevelMixin +from profiles.permissions import OwnerPermissionMixin class DataSourceListView(ListView): diff --git a/datasources/views/licence.py b/datasources/views/licence.py index 4b90a3694fad9e37d22ee0241c19ee8b0e5448f9..078fd5535156dec41d93b911d919b34f1e2c5c11 100644 --- a/datasources/views/licence.py +++ b/datasources/views/licence.py @@ -3,7 +3,7 @@ from django.urls import reverse_lazy from django.views.generic import CreateView, DeleteView, DetailView, ListView, UpdateView from .. import forms, models -from core.permissions import OwnerPermissionMixin +from profiles.permissions import OwnerPermissionMixin class LicenceListView(ListView): diff --git a/datasources/views/user_permission_link.py b/datasources/views/user_permission_link.py index f9d019c66f3b36f94f30e8a89f06b957e169828e..b7d4c4fddb023e68fc1f4c2572f99b2f3c002125 100644 --- a/datasources/views/user_permission_link.py +++ b/datasources/views/user_permission_link.py @@ -6,11 +6,11 @@ from django.shortcuts import reverse from django.views.generic.detail import DetailView from django.views.generic.edit import UpdateView -from profiles.permissions import OwnerPermissionRequiredMixin +from profiles.permissions import OwnerPermissionMixin from datasources import forms, models -class DataSourceAccessManageView(OwnerPermissionRequiredMixin, DetailView): +class DataSourceAccessManageView(OwnerPermissionMixin, DetailView): model = models.DataSource template_name = 'datasources/datasource/manage_access.html' context_object_name = 'datasource' diff --git a/pedasi/settings.py b/pedasi/settings.py index 990ea0db8172b2ab1c2ff3b408188fd9b41369cd..a6439821b741fb860f60e632f8754904bb391dae 100644 --- a/pedasi/settings.py +++ b/pedasi/settings.py @@ -89,7 +89,7 @@ THIRD_PARTY_APPS = [ CUSTOM_APPS = [ 'profiles.apps.ProfilesConfig', # Refer to AppConfig directly since we override the .ready() method 'applications', - 'datasources', + 'datasources.apps.DatasourcesConfig', 'provenance', 'core', 'api', diff --git a/profiles/apps.py b/profiles/apps.py index 51cb0a78fe63eacbca8d06e30a61b06a8d1bae35..9eef31bf4bd504d324afa4a88433f4e33576a58b 100644 --- a/profiles/apps.py +++ b/profiles/apps.py @@ -49,6 +49,7 @@ class ProfilesConfig(AppConfig): # Runs after app registry is populated - i.e. all models exist and are importable try: self.create_groups() + logging.info('Loaded inline Group fixtures') except ProgrammingError: logging.warning('Could not create Group fixtures, database has not been initialized') diff --git a/profiles/permissions.py b/profiles/permissions.py index dd58254727d8a73532135ff5277ff4ef25ae7224..5c5d5a4e82df6dd8131f0751a2338f4929be9b4e 100644 --- a/profiles/permissions.py +++ b/profiles/permissions.py @@ -1,7 +1,7 @@ from django.contrib.auth.mixins import UserPassesTestMixin, PermissionRequiredMixin -class OwnerPermissionRequiredMixin(PermissionRequiredMixin): +class OwnerPermissionMixin(PermissionRequiredMixin): """ Mixin to require that a user has the relevant global permission and is the owner of the relevant object.