diff --git a/src/service/resources/TICKscript/deadman-template.tick b/src/service/resources/TICKscript/deadman-template.tick index 5762d4ad5505972b989f1561a36ede5e3bc099b7..1b03fdb08b0ec1b8cb6b4dc9cf68c347bca6a16b 100644 --- a/src/service/resources/TICKscript/deadman-template.tick +++ b/src/service/resources/TICKscript/deadman-template.tick @@ -1,3 +1,9 @@ +var sfc string + +var sfci string + +var policy string + var db string // database per service function chain, so db is named after sfc var rp = 'autogen' // default value for the retention policy @@ -25,7 +31,7 @@ stream .where(whereClause) | deadman(throughputThreshold, alertPeriod) .id(eventID) - .details('db=' + db + ',measurement=' + measurement) + .details('db=' + db + ',sfc=' + sfc + ',sfci=' + sfci + ',policy=' + policy + ',trigger=' + eventID) .message(messageValue) .topic(topicID) .noRecoveries() \ No newline at end of file diff --git a/src/service/resources/TICKscript/relative-stream-template.tick b/src/service/resources/TICKscript/relative-stream-template.tick new file mode 100644 index 0000000000000000000000000000000000000000..0a90f3d5dff8e7ecb327d22a6b07e7d26d9b5a57 --- /dev/null +++ b/src/service/resources/TICKscript/relative-stream-template.tick @@ -0,0 +1,46 @@ +var db string // database per service function chain, so db is named after sfc + +var rp = 'autogen' // default value for the retention policy + +var measurement string + +var selectLambda lambda // must be a lambda specifying the field to select e.g. "requests" + +var whereClause = lambda: True // default value is a function which returns TRUE, hence no filtering of the query result + +var messageValue = 'TRUE' // default value is TRUE, as this is what SFEMC expects as a notification for an event rule + +var comparisonLambda lambda // comparison function e.g. "diff" > 40 + +var alertPeriod duration + +var topicID string + + +var data = stream + | from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + .where(whereClause) + | eval(selectLambda) + .as('value') + +var past = data + | shift(alertPeriod) + +var current = data + +past + | join(current) // NOTE: join buffers a given data point until a point with the correct timestamp to join on arrives + .as('past', 'current') + | eval(lambda: float("current.value" - "past.value")) + .keep() + .as('diff') + | alert() + .id(topicID) + .details('db=' + db + ',measurement=' + measurement) + .crit(comparisonLambda) + .message(messageValue) + .topic(topicID) + .noRecoveries() diff --git a/src/service/resources/TICKscript/relative-template.tick b/src/service/resources/TICKscript/relative-template.tick index 30ac95c962a6bf547c93935810beb2d108a38b3f..9fb0daabbc297cea78838051f49ca716a057f7ea 100644 --- a/src/service/resources/TICKscript/relative-template.tick +++ b/src/service/resources/TICKscript/relative-template.tick @@ -1,3 +1,9 @@ +var sfc string + +var sfci string + +var policy string + var db string // database per service function chain, so db is named after sfc var rp = 'autogen' // default value for the retention policy @@ -42,7 +48,7 @@ past .as('diff') | alert() .id(eventID) - .details('db=' + db + ',measurement=' + measurement) + .details('db=' + db + ',sfc=' + sfc + ',sfci=' + sfci + ',policy=' + policy + ',trigger=' + eventID) .crit(comparisonLambda) .message(messageValue) .topic(topicID) diff --git a/src/service/resources/TICKscript/threshold-batch-template.tick b/src/service/resources/TICKscript/threshold-batch-template.tick index 379a49c4fffb030f8f1775c658c1b25cf8f64d2e..f33f850a4155ff007d41b91342fef3ac55f9925f 100644 --- a/src/service/resources/TICKscript/threshold-batch-template.tick +++ b/src/service/resources/TICKscript/threshold-batch-template.tick @@ -1,3 +1,9 @@ +var sfc string + +var sfci string + +var policy string + var db string // database per service function chain, so db is named after sfc var rp = 'autogen' // default value for the retention policy @@ -26,7 +32,7 @@ batch .every(alertPeriod) |alert() .id(eventID) - .details('db=' + db + ',measurement=' + measurement) + .details('db=' + db + ',sfc=' + sfc + ',sfci=' + sfci + ',policy=' + policy + ',trigger=' + eventID) .crit(comparisonLambda) .message(messageValue) .topic(topicID) diff --git a/src/service/resources/TICKscript/threshold-stream-template.tick b/src/service/resources/TICKscript/threshold-stream-template.tick index 6ee92fd2c03b7d68efa871bff02dc53b29f1c1fd..fb85abac7048f1558446b7035889aa64c249a4b0 100644 --- a/src/service/resources/TICKscript/threshold-stream-template.tick +++ b/src/service/resources/TICKscript/threshold-stream-template.tick @@ -1,3 +1,9 @@ +var sfc string + +var sfci string + +var policy string + var db string // database per service function chain, so db is named after sfc var rp = 'autogen' // default value for the retention policy @@ -23,7 +29,7 @@ stream .where(whereClause) | alert() .id(eventID) - .details('db=' + db + ',measurement=' + measurement) + .details('db=' + db + ',sfc=' + sfc + ',sfci=' + sfci + ',policy=' + policy + ',trigger=' + eventID) .crit(comparisonLambda) .message(messageValue) .topic(topicID) diff --git a/src/service/setup.py b/src/service/setup.py index 7a801e6e0bfe4805e8c079223254598f16346126..1078a0600e7cedc1a885ffbbf43a7a22c5294ae6 100644 --- a/src/service/setup.py +++ b/src/service/setup.py @@ -53,6 +53,7 @@ def get_version(*relative_path): requires = [ 'ipython==6.5.0', + 'jupyter_console==5.2.0', 'plaster_pastedeploy==0.6', 'pyramid==1.9.2', 'pyramid_debugtoolbar==4.5', diff --git a/src/test/clmctest/alerts/test_alerts.py b/src/test/clmctest/alerts/test_alerts.py index 59fefb48eb0f7f5e3a88f13c5ba4f06943924f51..ceb844efdc55ee4ef0040207189cfa0cedac43ee 100644 --- a/src/test/clmctest/alerts/test_alerts.py +++ b/src/test/clmctest/alerts/test_alerts.py @@ -22,10 +22,12 @@ ## Created for Project : FLAME """ -from time import sleep +from time import sleep, strptime from requests import post, get from os import listdir from os.path import join, dirname +from json import load +from schema import Schema, And, Or, Optional, SchemaError from clmctest.alerts.alert_handler_server import LOG_TEST_FOLDER_PATH @@ -33,6 +35,47 @@ CLMC_SERVICE_PORT = 9080 NGINX_PORT = 80 +def is_valid_timestamp(str_timestamp): + try: + strptime(str_timestamp, "%Y-%m-%dT%H:%M:%SZ") + return True + except ValueError: + return False + + +def is_valid_details_string(details): + return "db=" in details and "sfc=" in details and "sfci=" in details and "policy" in details + + +JSON_BODY_SCHEMA = Schema({ + "message": "TRUE", + "id": str, + "level": "CRITICAL", + "duration": int, + "previousLevel": str, + "details": And(str, is_valid_details_string), + "time": And(str, is_valid_timestamp), + "data": { + "series": [ + { + "name": str, + Optional("tags"): { + str: str + }, + "columns": [ + str + ], + "values": [ + [ + Or(str, int) + ] + ] + } + ] + } +}) + + class TestAlerts(object): def test_alert_triggers(self, rspec_config, set_up_tear_down_fixture): @@ -47,7 +90,7 @@ class TestAlerts(object): :param rspec_config: fixture from conftest.py """ - global CLMC_SERVICE_PORT, NGINX_PORT + global CLMC_SERVICE_PORT, NGINX_PORT, JSON_BODY_SCHEMA clmc_service_host, nginx_host = None, None for host in rspec_config: @@ -85,4 +128,19 @@ class TestAlerts(object): print("Wait 15 seconds for Kapacitor to trigger alerts...") sleep(15) - assert len(listdir(LOG_TEST_FOLDER_PATH)) == 4, "4 log files must have been created - one for each alert defined in the specification." + alert_logs = listdir(LOG_TEST_FOLDER_PATH) + assert len(alert_logs) == 4, "4 log files must have been created - one for each alert defined in the specification." + + for alert_log in alert_logs: + alert_log_path = join(LOG_TEST_FOLDER_PATH, alert_log) + + with open(alert_log_path) as fh: + alert_json = load(fh) + + try: + JSON_BODY_SCHEMA.validate(alert_json) + valid = True + except SchemaError: + valid = False + + assert valid, "Alert log content is invalid - {0}".format(alert_log_path)