From 8ec0b5b2d18411ea7c2676915717e63e7431f227 Mon Sep 17 00:00:00 2001
From: Nikolay Stanchev <ns17@it-innovation.soton.ac.uk>
Date: Fri, 25 May 2018 10:36:58 +0100
Subject: [PATCH] Added URL regex validation plus tests

---
 src/clmc-webservice/clmcservice/tests.py     | 67 ++++++++++++++++++--
 src/clmc-webservice/clmcservice/utilities.py | 17 ++++-
 2 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/src/clmc-webservice/clmcservice/tests.py b/src/clmc-webservice/clmcservice/tests.py
index c6b95ed..eee634b 100644
--- a/src/clmc-webservice/clmcservice/tests.py
+++ b/src/clmc-webservice/clmcservice/tests.py
@@ -25,7 +25,7 @@ import pytest
 from pyramid import testing
 from pyramid.httpexceptions import HTTPBadRequest
 from time import sleep
-from clmcservice.utilities import CONFIG_ATTRIBUTES, PROCESS_ATTRIBUTE, RUNNING_FLAG, MALFORMED_FLAG
+from clmcservice.utilities import CONFIG_ATTRIBUTES, PROCESS_ATTRIBUTE, RUNNING_FLAG, MALFORMED_FLAG, URL_REGEX
 import os
 import signal
 
@@ -80,12 +80,15 @@ class TestAggregatorAPI(object):
          {'aggregator_report_period': 20, 'aggregator_database_name': "CLMCMetrics", 'aggregator_database_url': "http://172.60.231.51:8086"}),
         ('{"aggregator_report_period": 25, "aggregator_database_name": "CLMCMetrics", "aggregator_database_url": "http://172.60.231.51:8086"}',
          {'aggregator_report_period': 25, 'aggregator_database_name': "CLMCMetrics", 'aggregator_database_url': "http://172.60.231.51:8086"}),
-        ('{"aggregator_report_period": 200, "aggregator_database_name": "E2EMetrics", "aggregator_database_url": "http://172.50.231.51:8086"}',
-         {'aggregator_report_period': 200, 'aggregator_database_name': "E2EMetrics", 'aggregator_database_url': "http://172.50.231.51:8086"}),
-        ('{"aggregator_report_period": 150, "aggregator_database_name": "CLMCMetrics", "aggregator_database_url": "http://172.40.231.51:8086"}',
-         {'aggregator_report_period': 150, 'aggregator_database_name': "CLMCMetrics", 'aggregator_database_url': "http://172.40.231.51:8086"}),
+        ('{"aggregator_report_period": 200, "aggregator_database_name": "E2EMetrics", "aggregator_database_url": "https://172.50.231.51:8086"}',
+         {'aggregator_report_period': 200, 'aggregator_database_name': "E2EMetrics", 'aggregator_database_url': "https://172.50.231.51:8086"}),
+        ('{"aggregator_report_period": 150, "aggregator_database_name": "CLMCMetrics", "aggregator_database_url": "https://localhost:8086"}',
+         {'aggregator_report_period': 150, 'aggregator_database_name': "CLMCMetrics", 'aggregator_database_url': "https://localhost:8086"}),
         ("{aggregator_report_period: 2hb5, aggregator_database_name: CLMCMetrics, aggregator_database_url: http://172.60.231.51:8086}", None),
         ("{aggregator_report_period: 250-, aggregator_database_name: CLMCMetrics, aggregator_database_url: http://172.60.231.52:8086}", None),
+        ("{aggregator_report_period: 25, aggregator_database_name: CLMCMetrics, aggregator_database_url: ftp://172.60.231.51:8086}", None),
+        ("{aggregator_report_period: 25, aggregator_database_name: CLMCMetrics, aggregator_database_url: http://172.60.231.51:8086/query param}", None),
+        ("{aggregator_report_period: 250, aggregator_database_name: CLMCMetrics, aggregator_database_url: http://172.60.231.52:808686}", None),
         ("{}", None),
         ("{aggregator_running: true}", None),
     ])
@@ -360,3 +363,57 @@ class TestAggregatorAPI(object):
         assert not self.config.get_settings().get(RUNNING_FLAG), "The aggregator should have been stopped."
         assert not self.config.get_settings().get(MALFORMED_FLAG), "The malformed flag should have been reset to False."
         assert self.config.get_settings().get(PROCESS_ATTRIBUTE) is None, "The aggregator should have been stopped."
+
+
+class TestRegexURL(object):
+    """
+    A pytest-implementation test for the regular expression the service uses to validate the database URL
+    """
+
+    @pytest.mark.parametrize("valid_url", [
+        "http://localhost:8080/",
+        "https://localhost:80/url/path",
+        "https://192.168.20.20/?query=param",
+        "http://custom.domain.com",
+        "http://domain.net:8888/",
+        "https://10.160.150.4:21",
+        "http://localhost:12345",
+        "http://domain.com:21/path",
+        "http://domain.com:32?path",
+        "http://domain.com:43#path"
+    ])
+    def test_valid_urls(self, valid_url):
+        """
+        Tests that the regular expression can detect valid URLs.
+
+        :param valid_url: a string representing a valid URL
+        """
+
+        matched_object = URL_REGEX.match(valid_url)
+
+        assert matched_object is not None, "The regular expression fails in validating a correct URL."
+
+        assert matched_object.group() is not None, "The matched object should return the full-match string"
+
+    @pytest.mark.parametrize("invalid_url", [
+        "ftp://localhost:80/url/path",
+        "tcp://192.168.20.20/?query=param",
+        "http:/localhost:80/",
+        "https//localhost:8080/",
+        "https://domain:1234/url/path",
+        "http://domain.com:808080/",
+        "http://localhost:8-080/",
+        "http://localhost:port80/",
+        "http://domain.com:8080url/path",
+        "http://domain.com:8080/?url path",
+    ])
+    def test_invalid_urls(self, invalid_url):
+        """
+        Tests that the regular expression can detect invalid URLs.
+
+        :param invalid_url: a string representing an invalid URL
+        """
+
+        matched_object = URL_REGEX.match(invalid_url)
+
+        assert matched_object is None, "The regular expression fails in detecting an invalid URL."
diff --git a/src/clmc-webservice/clmcservice/utilities.py b/src/clmc-webservice/clmcservice/utilities.py
index 4ba630f..ddb8dd9 100644
--- a/src/clmc-webservice/clmcservice/utilities.py
+++ b/src/clmc-webservice/clmcservice/utilities.py
@@ -22,7 +22,7 @@
 """
 
 from json import loads
-
+from re import compile, IGNORECASE
 
 CONFIG_ATTRIBUTES = ('aggregator_report_period', 'aggregator_database_name', 'aggregator_database_url')  # all of the configuration attributes - to be used as dictionary keys
 
@@ -37,6 +37,15 @@ MALFORMED_FLAG = 'malformed'  # Attribute for storing the flag, which shows whet
 COMMENT_ATTRIBUTE = 'comment'
 COMMENT_VALUE = 'Aggregator is running in a malformed state - it uses an old version of the configuration. Please, restart it so that the updated configuration is used.'
 
+URL_REGEX = compile(
+    r'^https?://'  # http:// or https://
+    r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain, e.g. example.domain.com
+    r'localhost|'  # or localhost...
+    r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # or IP address (IPv4 format)
+    r'(?::\d{2,5})?'  # optional port number
+    r'(?:[/?#][^\s]*)?$',  # URL path or query parameters
+    IGNORECASE)
+
 
 def validate_config_content(configuration):
     """
@@ -59,9 +68,11 @@ def validate_config_content(configuration):
     for attribute in CONFIG_ATTRIBUTES:
         assert attribute in configuration
 
-    assert type(configuration['aggregator_report_period']) == int, "Report period must be an integer, received {0} instead.".format(configuration.get('aggregator_report_period'))
+    assert type(configuration.get('aggregator_report_period')) == int, "Report period must be an integer, received {0} instead.".format(configuration.get('aggregator_report_period'))
+
+    assert configuration.get('aggregator_report_period') > 0, "Report period must be a positive integer, received {0} instead.".format(configuration.get('aggregator_report_period'))
 
-    assert configuration['aggregator_report_period'] > 0, "Report period must be a positive integer, received {0} instead.".format(configuration.get('aggregator_report_period'))
+    assert URL_REGEX.match(configuration.get('aggregator_database_url')) is not None, "The aggregator must have a valid database URL in its configuration, received {0} instead.".format(configuration.get('aggregator_report_period'))
 
     return configuration
 
-- 
GitLab