diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 42cf1e0faead68b693989dd39cbd470d1d414938..c50739fb8257d0ef80b9e69d41cda1228ccae5e8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,8 +35,8 @@ build:tests: - python setup.py sdist --dist-dir=$CI_PROJECT_DIR/build artifacts: paths: - - build/clmctest-2.1.2.tar.gz - - build/clmcservice-2.1.2.tar.gz + - build/clmctest-2.2.0.tar.gz + - build/clmcservice-2.2.0.tar.gz expire_in: 1 day test:all: @@ -50,8 +50,8 @@ test:all: - echo "REPO_PASS=${REPO_PASS}" >> $CI_PROJECT_DIR/reporc - sudo scripts/test/fixture.sh create -f src/test/clmctest/rspec.json -r $CI_PROJECT_DIR -c all - sudo mkdir /var/lib/lxd/containers/test-runner/rootfs/opt/clmc/build - - sudo cp build/clmctest-2.1.2.tar.gz /var/lib/lxd/containers/test-runner/rootfs/opt/clmc/build - - sudo lxc exec test-runner -- pip3 install /opt/clmc/build/clmctest-2.1.2.tar.gz + - sudo cp build/clmctest-2.2.0.tar.gz /var/lib/lxd/containers/test-runner/rootfs/opt/clmc/build + - sudo lxc exec test-runner -- pip3 install /opt/clmc/build/clmctest-2.2.0.tar.gz - sudo lxc exec test-runner -- pytest -s --tb=short -rfp --pyargs clmctest when: on_success diff --git a/docs/graph-monitoring-user-guide.md b/docs/graph-monitoring-user-guide.md new file mode 100644 index 0000000000000000000000000000000000000000..c02d58ee36dca0942603795c9c52ee0bbe6cf4c0 --- /dev/null +++ b/docs/graph-monitoring-user-guide.md @@ -0,0 +1,241 @@ +<!-- +// © University of Southampton IT Innovation Centre, 2018 +// +// Copyright in this software belongs to University of Southampton +// IT Innovation Centre of Gamma House, Enterprise Road, +// Chilworth Science Park, Southampton, SO16 7NS, UK. +// +// This software may not be used, sold, licensed, transferred, copied +// or reproduced in whole or in part in any manner or form or in or +// on any media by any person other than in accordance with the terms +// of the Licence Agreement supplied with the software, or otherwise +// without the prior written consent of the copyright owners. +// +// This software is distributed WITHOUT ANY WARRANTY, without even the +// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE, except where stated in the Licence Agreement supplied with +// the software. +// +// Created By : Nikolay Stanchev +// Created Date : 17-05-2019 +// Created for Project : FLAME +--> + + +## CLMC - Graph-based measurements of service end-to-end delay + + +### Input requirements + +CLMC offers API endpoints to build and query a layer-based graph data structure starting from the infrastructure network layer +up to the logical abstraction layer of a media service. This graph can then be further used to measure an aggregation of the end-to-end delay +from a particular user equipment to a given service function endpoint without putting additional load on the deployed services. For detailed analysis +on the calculations performed by CLMC to derive this metric see the [documentation](https://gitlab.it-innovation.soton.ac.uk/FLAME/consortium/3rdparties/flame-clmc/blob/master/docs/total-service-request-delay.md) +particularly the [conclusions](https://gitlab.it-innovation.soton.ac.uk/FLAME/consortium/3rdparties/flame-clmc/blob/master/docs/total-service-request-delay.md#conclusion) section. +In order to use the API, three metrics must first be measured for each service function: + +* **response_time** – how much time it takes for a service to process a request (seconds) + +* **request_size** – the size of incoming requests for this service (bytes) + +* **response_size** – the size of outgoing responses from this service (bytes) + +An example is a Tomcat-based service, which uses the Tomcat telegraf input plugin for monitoring – the plugin measures the following fields +**bytes_sent**, **bytes_received** and **processing_time**. The measurement name is **tomcat_connector**. + +* **processing_time** is the total time spent processing incoming requests measured since the server has started, therefore, +this is a constantly increasing value. + +* **bytes_sent** and **bytes_received** measured using the same approach + +The graph monitoring process runs every X seconds, where X is configurable (e.g. 30 seconds). The media service provider +must define how to get the aggregated value of the three fields defined above for this X-seconds window. For example, +if the media service provider decides to use **mean** values, the following definitions can be used for a Tomcat-based service: + +* **response_time** - `(max(processing_time) - min(processing_time)) / ((count(processing_time) -1)*1000)` + +* **request_size** - `(max(bytes_received) - min(bytes_received)) / (count(bytes_received) - 1)` + +* **response_size** - `(max(bytes_sent) - min(bytes_sent)) / (count(bytes_sent) - 1)` + +Simply explained, since the Tomcat plugin measures these values as a continuously increasing measurement, we take difference between +the maximum and the minimum value received in the time window and divide by the number of measurements received for the time window, which +basically gives us the average (response time also divided by 1000 to convert milliseconds to seconds). + +To demonstrate this, let's say that the measurements received in the time window for **processing_time** are 21439394, 21439399 and 21439406 milliseconds. +Therefore, the average processing time would be (21439406 - 21439394) / ((3 - 1) * 1000) = 0.006 seconds. The same procedure is followed +for the request size and response size fields. + + +### Running a graph monitoring process + +There is a dedicated endpoint which starts an automated graph monitoring script, running in the background on CLMC, +constantly executing a full processing pipeline - build temporal graph, query for end-to-end delay, write results bach in InfluxDB, delete +temporal graph. The pipeline uses the defined configuration to periodically build the temporal graph and query for the end-to-end delay +from all possible UEs to every deployed service function endpoint and writes the result back into a dedicated measurement in the time-series database (InfluxDB). +For more information on the graph monitoring pipeline, see the relevant section below. + +* `POST http://platform/clmc/clmc-service/graph/monitor` + +* Expected JSON body serving as the configuration of the graph monitoring script: + +```json +{ + "query_period": "<how often is the graph pipeline executed - defines the length of the time window mentioned above>", + "results_measurement_name": "<where to write the end-to-end delay measurements>", + "service_function_chain": "<SFC identifier>", + "service_function_chain_instance": "<SFC identifier>_1", + "service_functions": { + "<service function package>": { + "response_time_field": "<field measuring the service delay of a service function - as described above>", + "request_size_field": "<field measuring the request size of a service function - as described above>", + "response_size_field": "<field measuring the response size of a service function - as described above>", + "measurement_name": "<the name of the measurement which contains the fields above>" + }, + ... + } +} +``` + +* Example request with curl: + +`curl -X POST -d <JSON body> http://platform/clmc/clmc-service/graph/monitor` + +* Example JSON body for the tomcat-based service described above: + +```json +{ + "query_period": 30, + "results_measurement_name": "graph_measurements", + "service_function_chain": "fms-sfc", + "service_function_chain_instance": "fms-sfc_1", + "service_functions": { + "fms-storage": { + "response_time_field": "(max(processing_time) - min(processing_time)) / ((count(processing_time) -1)*1000)", + "request_size_field": "(max(bytes_received) - min(bytes_received)) / (count(bytes_received) - 1)", + "response_size_field": "(max(bytes_sent) - min(bytes_sent)) / (count(bytes_sent) - 1)", + "measurement_name": "tomcat_connector" + } + } +} +``` + +An example response will look like this: + +```json +{ + "uuid": "75df6f8d-3829-4fd8-a3e6-b3e917010141", + "database": "fms-sfc" +} +``` + +The configuration described above will start a graph monitoring process executing every 30 seconds and writing the end-to-end delay results +in the measurement named **graph_measurements**, database **fms-sfc**. To stop the graph monitoring process, use the request ID received in +the response of the previous request: + +`curl -X DELETE http://platform/clmc/clmc-service/graph/monitor/75df6f8d-3829-4fd8-a3e6-b3e917010141` + +To view the status of the graph monitoring process, send the same request, but using a GET method rather than DELETE. + +`curl -X GET http://platform/clmc/clmc-service/graph/monitor/75df6f8d-3829-4fd8-a3e6-b3e917010141` + +Keep in mind that since this process is executing once in a given period, it is expected to see status **sleeping** in the response. +Example response: + +```json +{ + "status": "sleeping", + "msg": "Successfully fetched status of graph pipeline process." +} +``` + +### Graph monitoring pipeline - technical details + +In order for service graph-based monitoring to be possible, the network topology graph must be built with the relevant network link latencies. +This network graph can be created/updated/deleted by sending a POST/PUT/DELETE request to the **/clmc/clmc-service/graph/network** API endpoint: + +``` +curl –X POST http://platform/clmc/clmc-service/graph/network +curl –X PUT http://platform/clmc/clmc-service/graph/network +curl –X DELETE http://platform/clmc/clmc-service/graph/network +``` + +After the network graph is built, a graph monitoring process can execute the following steps: + +1) Build a temporal graph for a particular service function chain +2) Query the temporal graph for round-trip-time +3) Write results in the time-series database (InfluxDB) +4) Clean up and delete the temporal graph + + +#### Building a temporal graph + +The temporal graph could be built by sending a POST request to the **/clmc/clmc-service/graph/temporal** API endpoint. The request body +follows the same format as the one used to start an automated graph monitoring script described above with the only difference being that the +**from** and **to** timestamps must be specified thus defining the time window for which this temporal graph relates to - for example: + +```json +{ + "from": "<start of the time window, UNIX timestamp, e.g. 1549881060>", + "to": "<end of the time window, UNIX timestamp, e.g. 1550151600>", + "service_function_chain": "<SFC identifier>", + "service_function_chain_instance": "<SFC identifier>_1", + "service_functions": { + "<service function package>": { + "response_time_field": "<field measuring the service delay of a service function - as described above>", + "request_size_field": "<field measuring the request size of a service function - as described above>", + "response_size_field": "<field measuring the response size of a service function - as described above>", + "measurement_name": "<the name of the measurement which contains the fields above>" + }, + ... + } +} +``` + +`curl -X POST -d <JSON body> http://platform/clmc/clmc-service/graph/temporal` + +The CLMC would then build the temporal graph in its graph database (Neo4j) and populate it with the time-series data valid for the defined time window. + + +#### Querying the temporal graph + +The temporal graph built in the previous step can be used to retrieve the end-to-end delay by sending a GET request to the +**/clmc/clmc-service/graph/temporal/{uuid}/round-trip-time?starpoint={ue, cluster or switch}&endpoint={service function endpoint}**. +This endpoint requires the UUID of the temporal graph received in the response from the previous step, as well as a UE and service function endpoint identifiers. +The query is, thus, configured to return the end-to-end delay from a particular UE (User Equipment) to a particular service endpoint deployed on the FLAME platform. +For example: + +`curl -X GET http://platform/clmc/clmc-service/graph/temporal/ac2cd21c-9c36-44ea-a923-51ca3f72bf7a/round-trip-time?startpoint=ue20&endpoint=fms-storage-endpoint` + +The automated graph monitoring process (described in the previous sections) executes this query for every possible pair of a UE and a service function endpoint to +ensure that all metrics are collected. + + +#### Writing results in InfluxDB + +The response of the previous requests will contain metrics such as round-trip-time, network delay and service delay. These are returned in JSON format +which must then be converted to the InfluxDB line protocol format. An example would look like: + +``` +graph_measurement,flame_server=DC3,flame_sfci=fms-sfc-1,flame_location=DC3,flame_sfe=fms-storage-second-endpoint,flame_sfp=fms-storage,flame_sfc=fms-sfc,flame_sf=fms-storage-ns,traffic_source=ue24 round_trip_time=0.029501264137931037,service_delay=0.0195,network_delay=0.005 1550499460000000000 +``` + +This measurement line could then be reported to InfluxDB with a POST request to **/clmc/influxdb/write?db={SFC identifier}**: + +`curl -X POST http://platform/clmc/influxdb/write?db=fms-sfc --data-binary <measurement line>` + + +#### Clean up + +Once the temporal graph is no longer used, or the time window it relates to is no longer viable, it can be deleted with a DELETE +request to **/clmc/clmc-service/graph/temporal/{uuid}**. The UUID parameter is the same as in the round-trip time query request, +i.e. the UUID received when building the temporal graph. For example: + +`curl -X DELETE http://platform/clmc/clmc-service/graph/temporal/ac2cd21c-9c36-44ea-a923-51ca3f72bf7a` + + +#### Summary + +The graph monitoring process described in the beginning of this document automates the steps described above. When defining a query period, e.g. 30 seconds, +the process will start executing the pipeline every 30 seconds, by defining a non-overlapping, contiguous time windows. For each time window, a temporal graph is built, +then queried for end-to-end delay and finally deleted. \ No newline at end of file diff --git a/scripts/clmc-service/graph-network-topology.sh b/scripts/clmc-service/graph-network-topology.sh new file mode 100644 index 0000000000000000000000000000000000000000..57c58433ed4ae9582d3fccacb36288df4a6209ec --- /dev/null +++ b/scripts/clmc-service/graph-network-topology.sh @@ -0,0 +1,39 @@ +#!/bin/bash +#///////////////////////////////////////////////////////////////////////// +#// +#// (c) University of Southampton IT Innovation Centre, 2018 +#// +#// Copyright in this software belongs to University of Southampton +#// IT Innovation Centre of Gamma House, Enterprise Road, +#// Chilworth Science Park, Southampton, SO16 7NS, UK. +#// +#// This software may not be used, sold, licensed, transferred, copied +#// or reproduced in whole or in part in any manner or form or in or +#// on any media by any person other than in accordance with the terms +#// of the Licence Agreement supplied with the software, or otherwise +#// without the prior written consent of the copyright owners. +#// +#// This software is distributed WITHOUT ANY WARRANTY, without even the +#// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +#// PURPOSE, except where stated in the Licence Agreement supplied with +#// the software. +#// +#// Created By : Nikolay Sanchev +#// Created Date : 03/06/2019 +#// Created for Project : FLAME +#// +#///////////////////////////////////////////////////////////////////////// + + +CLMC_IP="localhost" + +while true +do + + echo "Updating graph network topology..." + curl -s -X PUT http://${CLMC_IP}/clmc-service/graph/network + + echo "Waiting 2 minute before next update..." + sleep $((2*60)) + +done diff --git a/scripts/clmc-service/graph-pipeline.sh b/scripts/clmc-service/graph-pipeline.sh index c59987bb2601a712b2a2e71b5df941257baa4491..7cc115cbaf54c5c2238af7234314c90666d8cde1 100644 --- a/scripts/clmc-service/graph-pipeline.sh +++ b/scripts/clmc-service/graph-pipeline.sh @@ -79,14 +79,16 @@ do local_tags=$(echo ${response} | jq -r '.local_tags | to_entries | map("\(.key)=\(.value|tostring)") | join (",")') echo "Local tags: ${local_tags}" - fields=$(echo ${response} | jq -r '. | "\(.round_trip_time) \(.response_time) \(.total_forward_latency)"') - read rtt service_delay network_delay <<< ${fields} + fields=$(echo ${response} | jq -r '. | "\(.round_trip_time) \(.request_size) \(.response_size) \(.response_time) \(.total_forward_latency)"') + read rtt request_size response_size service_delay network_delay <<< ${fields} echo "Round-trip-time: ${rtt}" echo "Service delay: ${service_delay}" echo "Network latency ${network_delay}" + echo "Request size: ${request_size}" + echo "Response size: ${response_size}" - measurement_line="${results_measurement},${global_tags},${local_tags} round_trip_time=${rtt},service_delay=${service_delay},network_delay=${network_delay} ${timestamp}" + measurement_line="${results_measurement},${global_tags},${local_tags} round_trip_time=${rtt},service_delay=${service_delay},network_delay=${network_delay},request_size=${request_size},response_size=${response_size} ${timestamp}" echo "Measurement line: ${measurement_line}" response=$(curl -si -X POST "http://${CLMC_IP}/influxdb/write?db=${db_name}" --data-binary "${measurement_line}") echo "InfluxDB response: ${response}" diff --git a/scripts/clmc-service/install-clmc-service.sh b/scripts/clmc-service/install-clmc-service.sh index 6b3afbe75b9ee2d44b1eb7bfc09001780be278f4..43fa9f5f716c6eff477174f01cdc026aa7d5c9e0 100755 --- a/scripts/clmc-service/install-clmc-service.sh +++ b/scripts/clmc-service/install-clmc-service.sh @@ -18,7 +18,7 @@ #// PURPOSE, except where stated in the Licence Agreement supplied with #// the software. #// -#// Created By : Michael Boniface, Nikolay Sanchev +#// Created By : Michael Boniface, Nikolay Stanchev #// Created Date : 13/12/2018 #// Created for Project : FLAME #// @@ -115,6 +115,14 @@ if [[ $? -ne 0 ]] ; then exit 1 fi +# move the graph pipeline script +cp ${REPO_ROOT}/scripts/clmc-service/graph-pipeline.sh /usr/local/bin/graph-pipeline.sh +chmod u+x /usr/local/bin/graph-pipeline.sh + +# move the graph network topology script +cp ${REPO_ROOT}/scripts/clmc-service/graph-network-topology.sh /usr/local/bin/graph-network-topology.sh +chmod u+x /usr/local/bin/graph-network-topology.sh + # Install clmc as systemctl service # ----------------------------------------------------------------------- mkdir -p /opt/flame/clmc @@ -157,7 +165,3 @@ done apt-get install nginx -y cp ${REPO_ROOT}/scripts/clmc-service/nginx.conf /etc/nginx/nginx.conf systemctl restart nginx # nginx is already started on installation, to read the new conf it needs to be restarted - -# move the graph pipeline script -cp ${REPO_ROOT}/scripts/clmc-service/graph-pipeline.sh /usr/local/bin/ -chmod u+x /usr/local/bin/graph-pipeline.sh \ No newline at end of file diff --git a/src/service/VERSION b/src/service/VERSION index b62a3e51a74a810ec8e8b760cce66cdbfac3c1d6..1f8197ce8a3b7dcfa3937a90ffee1db776aaace7 100644 --- a/src/service/VERSION +++ b/src/service/VERSION @@ -1 +1 @@ -__version__ = "2.1.2" \ No newline at end of file +__version__ = "2.2.0" \ No newline at end of file diff --git a/src/service/clmcservice/graphapi/tests.py b/src/service/clmcservice/graphapi/tests.py index 871dd5e617b302a99fb7fadc21fa7c4c5dba226b..596725c22f9f73c61f11d0db3ce150bd4a971bed 100644 --- a/src/service/clmcservice/graphapi/tests.py +++ b/src/service/clmcservice/graphapi/tests.py @@ -422,22 +422,23 @@ class TestGraphAPI(object): ("DC6", "nginx_1_ep2", [], [], 22.2, 35600, 6420, 22.2, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), ("127.0.0.6", "nginx_1_ep2", [0], [0], 22.2, 35600, 6420, 22.2, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), ("ue6", "nginx_1_ep2", [0, 0], [0, 0], 22.2, 35600, 6420, 22.2, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), - ("DC2", "nginx_1_ep2", [0, 7.5, 15, 4.5, 0], [0, 4.5, 15, 7.5, 0], 22.2, 35600, 6420, 78, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), - ("127.0.0.2", "nginx_1_ep2", [7.5, 15, 4.5, 0], [0, 4.5, 15, 7.5], 22.2, 35600, 6420, 78, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), - ("DC3", "nginx_1_ep1", [0, 12.5, 0], [0, 12.5, 0], 18.2, 2260, 9660, 38, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), - ("127.0.0.3", "nginx_1_ep1", [12.5, 0], [0, 12.5], 18.2, 2260, 9660, 38, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), - ("ue3", "nginx_1_ep1", [0, 12.5, 0], [0, 12.5, 0], 18.2, 2260, 9660, 38, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}) + ("DC2", "nginx_1_ep2", [0, 7.5, 15, 4.5, 0], [0, 4.5, 15, 7.5, 0], 22.2, 35600, 6420, 76.2, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), + ("127.0.0.2", "nginx_1_ep2", [7.5, 15, 4.5, 0], [0, 4.5, 15, 7.5], 22.2, 35600, 6420, 76.2, {"flame_location": "DC6", "flame_sfe": "nginx_1_ep2", "flame_server": "DC6", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), + ("DC3", "nginx_1_ep1", [0, 12.5, 0], [0, 12.5, 0], 18.2, 2260, 9660, 43.2, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), + ("127.0.0.3", "nginx_1_ep1", [12.5, 0], [0, 12.5], 18.2, 2260, 9660, 43.2, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}), + ("ue3", "nginx_1_ep1", [0, 12.5, 0], [0, 12.5, 0], 18.2, 2260, 9660, 43.2, {"flame_location": "DC4", "flame_sfe": "nginx_1_ep1", "flame_server": "DC4", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_premium", "flame_sfp": "nginx", "flame_sf": "nginx_1"}) ): request = testing.DummyRequest() request.matchdict["graph_id"] = request_id request.params["endpoint"] = endpoint request.params["startpoint"] = startpoint response = GraphAPI(request).run_rtt_query() - # approximation is used to avoid long float numbers retrieved from influx, the test case ensures the results are different enough so that approximation of +-1 is good enough for testing - assert response.pop("round_trip_time") == pytest.approx(rtt, 1), "Incorrect RTT response" + # approximation is used to avoid incorrect float numbers comparison + assert response.pop("request_size") == pytest.approx(request_size, 0.01), "Incorrect request size in RTT response" + assert response.pop("response_size") == pytest.approx(response_size, 0.01), "Incorrect response size in RTT response" + assert response.pop("round_trip_time") == pytest.approx(rtt, 0.01), "Incorrect RTT response" assert response == {"forward_latencies": forward_latencies, "reverse_latencies": reverse_latencies, "total_forward_latency": sum(forward_latencies), "total_reverse_latency": sum(reverse_latencies), - "bandwidth": 104857600, "response_time": response_time, "local_tags": {"traffic_source": startpoint}, "global_tags": global_tags, - "request_size": request_size, "response_size": response_size}, "Incorrect RTT response" + "response_time": response_time, "local_tags": {"traffic_source": startpoint}, "global_tags": global_tags}, "Incorrect RTT response" # send a new request for a new service function chain to create a second subgraph to test response = next(responses) @@ -448,26 +449,26 @@ class TestGraphAPI(object): for startpoint, endpoint, forward_latencies, reverse_latencies, response_time, request_size, response_size, rtt, global_tags in ( ("DC5", "apache_1_ep1", [], [], 17.6, 1480, 7860, 17.6, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), ("127.0.0.5", "apache_1_ep1", [0], [0], 17.6, 1480, 7860, 17.6, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), - ("DC5", "minio_2_ep1", [], [], 7, 2998, 3610, 7, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), - ("127.0.0.5", "minio_2_ep1", [0], [0], 7, 2998, 3610, 7, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), - ("DC3", "apache_1_ep1", [0, 9, 15, 0], [0, 15, 9, 0], 17.6, 1480, 7860, 64, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), - ("127.0.0.3", "apache_1_ep1", [9, 15, 0], [0, 15, 9], 17.6, 1480, 7860, 64, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), - ("ue3", "apache_1_ep1", [0, 9, 15, 0], [0, 15, 9, 0], 17.6, 1480, 7860, 64, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), - ("DC2", "minio_2_ep1", [0, 7.5, 15, 0], [0, 15, 7.5, 0], 7, 2998, 3610, 53, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), - ("127.0.0.2", "minio_2_ep1", [7.5, 15, 0], [0, 15, 7.5], 7, 2998, 3610, 53, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), - ("ue2", "minio_2_ep1", [0, 7.5, 15, 0], [0, 15, 7.5, 0], 7, 2998, 3610, 53, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}) + ("DC5", "minio_2_ep1", [], [], 7, 2998.33, 3610, 7, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), + ("127.0.0.5", "minio_2_ep1", [0], [0], 7, 2998.33, 3610, 7, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), + ("DC3", "apache_1_ep1", [0, 9, 15, 0], [0, 15, 9, 0], 17.6, 1480, 7860, 65.6, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), + ("127.0.0.3", "apache_1_ep1", [9, 15, 0], [0, 15, 9], 17.6, 1480, 7860, 65.6, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), + ("ue3", "apache_1_ep1", [0, 9, 15, 0], [0, 15, 9, 0], 17.6, 1480, 7860, 65.6, {"flame_location": "DC5", "flame_sfe": "apache_1_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "apache", "flame_sf": "apache_1"}), + ("DC2", "minio_2_ep1", [0, 7.5, 15, 0], [0, 15, 7.5, 0], 7, 2998.33, 3610, 52, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), + ("127.0.0.2", "minio_2_ep1", [7.5, 15, 0], [0, 15, 7.5], 7, 2998.33, 3610, 52, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}), + ("ue2", "minio_2_ep1", [0, 7.5, 15, 0], [0, 15, 7.5, 0], 7, 2998.33, 3610, 52, {"flame_location": "DC5", "flame_sfe": "minio_2_ep1", "flame_server": "DC5", "flame_sfc": "test_sfc", "flame_sfci": "test_sfc_non_premium", "flame_sfp": "minio", "flame_sf": "minio_2"}) ): request = testing.DummyRequest() request.matchdict["graph_id"] = request_id request.params["endpoint"] = endpoint request.params["startpoint"] = startpoint response = GraphAPI(request).run_rtt_query() - # approximation is used to avoid long float numbers retrieved from influx, the test case ensures the results are different enough so that approximation of +-1 is good enough for testing - assert response.pop("request_size") == pytest.approx(request_size, 1), "Incorrect RTT response" - assert response.pop("response_size") == pytest.approx(response_size, 1), "Incorrect RTT response" - assert response.pop("round_trip_time") == pytest.approx(rtt, 1), "Incorrect RTT response" + # approximation is used to avoid incorrect float numbers comparison + assert response.pop("request_size") == pytest.approx(request_size, 0.01), "Incorrect request size in RTT response" + assert response.pop("response_size") == pytest.approx(response_size, 0.01), "Incorrect response size in RTT response" + assert response.pop("round_trip_time") == pytest.approx(rtt, 0.01), "Incorrect RTT response" assert response == {"forward_latencies": forward_latencies, "reverse_latencies": reverse_latencies, "total_forward_latency": sum(forward_latencies), "total_reverse_latency": sum(reverse_latencies), - "bandwidth": 104857600, "response_time": response_time, "local_tags": {"traffic_source": startpoint}, "global_tags": global_tags}, "Incorrect RTT response" + "response_time": response_time, "local_tags": {"traffic_source": startpoint}, "global_tags": global_tags}, "Incorrect RTT response" @patch('clmcservice.graphapi.views.load') @patch('clmcservice.graphapi.views.open') diff --git a/src/service/clmcservice/graphapi/views.py b/src/service/clmcservice/graphapi/views.py index 9232e810ea545259af668de452bcefba99d1088f..9fb84e62548df3852fe71854b5526b8b908c5275 100644 --- a/src/service/clmcservice/graphapi/views.py +++ b/src/service/clmcservice/graphapi/views.py @@ -216,40 +216,44 @@ class GraphAPI(object): result["total_forward_latency"] = total_forward_latency total_reverse_latency = sum(result["reverse_latencies"]) result["total_reverse_latency"] = total_reverse_latency - bandwidth = self.request.registry.settings["network_bandwidth"] - result["bandwidth"] = bandwidth service_delay = result["response_time"] - request_size = result["request_size"] - response_size = result["response_size"] - round_trip_time = self.calculate_round_trip_time(total_forward_latency, total_reverse_latency, service_delay, request_size, response_size, bandwidth) + # the service delay is assumed to include the network data delay, so no need to include the bandwidth and the request/response size in the calculation + # bandwidth = self.request.registry.settings["network_bandwidth"] + # request_size = result["request_size"] + # response_size = result["response_size"] + # round_trip_time = self.calculate_round_trip_time(total_forward_latency, total_reverse_latency, service_delay, request_size, response_size, bandwidth) + + # the calculation in the new model is simple the network latency added to the service processing time (network data delay + any time the service spends on processing) + round_trip_time = total_forward_latency + service_delay + total_reverse_latency result["round_trip_time"] = round_trip_time return result - @staticmethod - def calculate_round_trip_time(forward_latency, reverse_latency, service_delay, request_size, response_size, bandwidth, packet_size=1500, packet_header_size=50): - """ - Calculates the round trip time given the list of arguments. - - :param forward_latency: network latency in forward direction (s) - :param reverse_latency: network latency in reverse direction (s) - :param service_delay: media service delay (s) - :param request_size: request size (bytes) - :param response_size: response size (bytes) - :param bandwidth: network bandwidth (Mb/s) - :param packet_size: size of packet (bytes) - :param packet_header_size: size of the header of the packet (bytes) - :return: the calculated round trip time - """ - - if forward_latency > 0 and reverse_latency > 0: - forward_data_delay = (8/10**6) * (request_size / bandwidth) * (packet_size / (packet_size - packet_header_size)) - reverse_data_delay = (8/10**6) * (response_size / bandwidth) * (packet_size / (packet_size - packet_header_size)) - else: - forward_data_delay, reverse_data_delay = 0, 0 - - return forward_latency + forward_data_delay + service_delay + reverse_latency + reverse_data_delay + # @staticmethod + # def calculate_round_trip_time(forward_latency, reverse_latency, service_delay, request_size, response_size, bandwidth, packet_size=1500, packet_header_size=50): + # """ + # Calculates the round trip time given the list of arguments. + # + # (DEPRECATED - this is a very simplistic model which uses a fixed static bandwidth value and fixed packet / packet header size) + # + # :param forward_latency: network latency in forward direction (s) + # :param reverse_latency: network latency in reverse direction (s) + # :param service_delay: media service delay (s) + # :param request_size: request size (bytes) + # :param response_size: response size (bytes) + # :param bandwidth: network bandwidth (Mb/s) + # :param packet_size: size of packet (bytes) + # :param packet_header_size: size of the header of the packet (bytes) + # :return: the calculated round trip time + # """ + # + # forward_data_delay = (8/10**6) * (request_size / bandwidth) * (packet_size / (packet_size - packet_header_size)) + # reverse_data_delay = (8/10**6) * (response_size / bandwidth) * (packet_size / (packet_size - packet_header_size)) + # + # rtt = forward_latency + forward_data_delay + service_delay + reverse_latency + reverse_data_delay + # + # return rtt @view_config(route_name='graph_network_topology', request_method='POST') def build_network_topology(self): diff --git a/src/service/development.ini b/src/service/development.ini index e132f45f6e063f977e0decb8f93ed3bfbde1c25d..bfae568a67367e7e34cabfccd0236a62f0ad40d0 100644 --- a/src/service/development.ini +++ b/src/service/development.ini @@ -18,7 +18,7 @@ exclog.ignore = network_clusters_path = /opt/clmc/src/service/resources/GraphAPI/network_clusters.json network_ues_path = /opt/clmc/src/service/resources/GraphAPI/network_ues.json -# 10000 Mb/s = 10 Gb/s +# 10000 Mb/s = 10 Gb/s (static configuration of maximum theoretical bandwidth) network_bandwidth = 10000 # PostgreSQL connection url @@ -88,14 +88,16 @@ level = NOTSET formatter = generic [handler_filelog] -class = FileHandler -args = ('/var/log/flame/clmc/service.log','a') +# rotating file handler - uses the same file until the maximum size (40MB) is exceeded, then backup files are created +class = logging.handlers.RotatingFileHandler +# max size per file is 40MB with 5 backup files, hence up to 200MB of logging data is saved on a rotating basis +args = ('/var/log/flame/clmc/service.log', 'a', 40*1000*1000, 5) level = NOTSET formatter = generic [handler_exc_handler] -class = FileHandler -args = ('/var/log/flame/clmc/service-exceptions.log', 'a') +class = logging.handlers.RotatingFileHandler +args = ('/var/log/flame/clmc/service-exceptions.log', 'a', 40*1000*1000, 5) level = ERROR formatter = exc_formatter diff --git a/src/service/production.ini b/src/service/production.ini index c11a6346ac8553954538356f5e1c261a94bc3a74..eb5577e32d8261f0b05fa98109a1df42efa9e9b3 100644 --- a/src/service/production.ini +++ b/src/service/production.ini @@ -18,7 +18,7 @@ exclog.ignore = network_clusters_path = /opt/clmc/src/service/resources/GraphAPI/network_clusters.json network_ues_path = /opt/clmc/src/service/resources/GraphAPI/network_ues.json -# 10000 Mb/s = 10 Gb/s +# 10000 Mb/s = 10 Gb/s (static configuration of maximum theoretical bandwidth) network_bandwidth = 10000 # PostgreSQL connection url @@ -84,14 +84,16 @@ level = NOTSET formatter = generic [handler_filelog] -class = FileHandler -args = ('/var/log/flame/clmc/service.log','a') +# rotating file handler - uses the same file until the maximum size (40MB) is exceeded, then backup files are created +class = logging.handlers.RotatingFileHandler +# max size per file is 40MB with 5 backup files, hence up to 200MB of logging data is saved on a rotating basis +args = ('/var/log/flame/clmc/service.log', 'a', 40*1000*1000, 5) level = NOTSET formatter = generic [handler_exc_handler] -class = FileHandler -args = ('/var/log/flame/clmc/service-exceptions.log', 'a') +class = logging.handlers.RotatingFileHandler +args = ('/var/log/flame/clmc/service-exceptions.log', 'a', 40*1000*1000, 5) level = ERROR formatter = exc_formatter diff --git a/src/service/resources/GraphAPI/network_clusters.json b/src/service/resources/GraphAPI/network_clusters.json index 40fe2629a6ab664643591eb95533bfe7613520b8..11a44efef96ea34bf3485bc0f3afdd568da5e446 100644 --- a/src/service/resources/GraphAPI/network_clusters.json +++ b/src/service/resources/GraphAPI/network_clusters.json @@ -1,6 +1,6 @@ { - "172.20.231.11": "20-sr1-cluster1-cluster", - "172.20.231.18": "22-sr1-cluster1-cluster", - "172.20.231.17": "23-sr1-cluster1-cluster", - "172.20.231.2": "24-sr1-cluster1-cluster" + "172.20.231.7": "20-sr1-cluster1-cluster", + "172.20.231.12": "19-sr1-cluster1-cluster", + "172.20.231.17": "18-sr1-cluster1-cluster", + "172.20.231.15": "17-sr1-cluster1-cluster" } \ No newline at end of file diff --git a/src/service/resources/GraphAPI/network_ues.json b/src/service/resources/GraphAPI/network_ues.json index 40c1c973a03afcf29a8c3b2f42b9fdac85277831..8991ae6d450b08afc18eda771aa633851cfcb825 100644 --- a/src/service/resources/GraphAPI/network_ues.json +++ b/src/service/resources/GraphAPI/network_ues.json @@ -1,6 +1,6 @@ { - "172.20.231.3": "ue20", - "172.20.231.22": "ue22", - "172.20.231.7": "ue23", - "172.20.231.19": "ue24" + "172.20.231.14": "ue20", + "172.20.231.5": "ue19", + "172.20.231.9": "ue18", + "172.20.231.16": "ue17" } \ No newline at end of file diff --git a/src/test/VERSION b/src/test/VERSION index b62a3e51a74a810ec8e8b760cce66cdbfac3c1d6..1f8197ce8a3b7dcfa3937a90ffee1db776aaace7 100644 --- a/src/test/VERSION +++ b/src/test/VERSION @@ -1 +1 @@ -__version__ = "2.1.2" \ No newline at end of file +__version__ = "2.2.0" \ No newline at end of file