diff --git a/scripts/influx/telegraf_ipendpoint_template.conf b/scripts/influx/telegraf_ipendpoint_template.conf new file mode 100644 index 0000000000000000000000000000000000000000..0fe2cc5d992390b908eb87f65da515acf70089c7 --- /dev/null +++ b/scripts/influx/telegraf_ipendpoint_template.conf @@ -0,0 +1,122 @@ +# Telegraf configuration + +# Telegraf is entirely plugin driven. All metrics are gathered from the +# declared inputs, and sent to the declared outputs. + +# Plugins must be declared in here to be active. +# To deactivate a plugin, comment out the name and any variables. + +# Use 'telegraf -config telegraf.conf -test' to see what metrics a config +# file would generate. + +# Global tags can be specified here in key="value" format. +[global_tags] + # location of the data centre + location={{LOCATION}} + # media service template id + sfc={{SFC_ID}} + # media service instance + sfc_i={{SFC_ID_INSTANCE}} + # service function type + sf={{SF_ID}} + # service function instance id + sf_i={{SF_ID_INSTANCE}} + # ipendpoint id aka surrogate instance + ipendpoint={{IP_ENDPOINT_ID}} + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will cache metric_buffer_limit metrics for each output, and will + ## flush this buffer on a successful write. + metric_buffer_limit = 1000 + ## Flush the buffer whenever full, regardless of flush_interval. + flush_buffer_when_full = true + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## Logging configuration: + ## Run telegraf in debug mode + debug = false + ## Run telegraf in quiet mode + quiet = false + ## Specify the log file name. The empty string means to log to stdout. + logfile = "G:/Telegraf/telegraf.log" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + + +############################################################################### +# OUTPUTS # +############################################################################### + +# Configuration for influxdb server to send metrics to +[[outputs.influxdb]] + # The full HTTP or UDP endpoint URL for your InfluxDB instance. + # Multiple urls can be specified but it is assumed that they are part of the same + # cluster, this means that only ONE of the urls will be written to each interval. + # urls = ["udp://127.0.0.1:8089"] # UDP endpoint example + urls = ["{{INFLUXDB_URL}}"] # required + # The target database for metrics (telegraf will create it if not exists) + database = "CLMCMetrics" # required + # Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h". + # note: using second precision greatly helps InfluxDB compression + precision = "s" + + ## Write timeout (for the InfluxDB client), formatted as a string. + ## If not provided, will default to 5s. 0s means no timeout (not recommended). + timeout = "5s" + # username = "telegraf" + # password = "metricsmetricsmetricsmetrics" + # Set the user agent for HTTP POSTs (can be useful for log differentiation) + # user_agent = "telegraf" + # Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes) + # udp_payload = 512 +[[outputs.file]] + ## Files to write to, "stdout" is a specially handled file. + files = ["stdout", "/tmp/metrics.out"] + + ## Data format to output. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md + data_format = "influx" + + +############################################################################### +# INPUTS # +############################################################################### +# # Influx HTTP write listener +[[inputs.http_listener]] + ## Address and port to host HTTP listener on + service_address = ":8186" + + ## timeouts + read_timeout = "10s" + write_timeout = "10s" + + ## HTTPS + #tls_cert= "/etc/telegraf/cert.pem" + #tls_key = "/etc/telegraf/key.pem" + + ## MTLS + #tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] + \ No newline at end of file diff --git a/src/mediaServiceSim/LineProtocolGenerator.py b/src/mediaServiceSim/LineProtocolGenerator.py index f57689a2a34d90871caf0a864b21c93a860bd2df..3d4b07736b3fa3b318754d411aaeb1d91aa2f537 100644 --- a/src/mediaServiceSim/LineProtocolGenerator.py +++ b/src/mediaServiceSim/LineProtocolGenerator.py @@ -18,7 +18,7 @@ def generate_network_report(recieved_bytes, sent_bytes, time): result += ' ' + str(_getNSTime(time)) # Measurement - # print('network'+result) + #print(result) return result @@ -42,13 +42,14 @@ def generate_vm_config(state, cpu, mem, storage, time): # Reports cpu usage, scaling on requests -def generate_cpu_report(cpu_useage, cpu_usage_system, time): - result = 'vm_host_cpu_usage' +def generate_cpu_report(cpu_usage, cpu_active_time, cpu_idle_time, time): + result = 'cpu_usage' # Tag result += ' ' # field - result += 'cpu_usage='+str(cpu_useage) - result += ',cpu_usage_system='+str(cpu_usage_system) + result += 'cpu_usage='+str(cpu_usage) + result += ',cpu_active_time='+str(cpu_active_time) + result += ',cpu_idle_time='+str(cpu_idle_time) result += ' ' # Time result += str(_getNSTime(time)) @@ -70,9 +71,24 @@ def generate_mpegdash_report(resource, requests, avg_response_time, peak_respons result += 'peak_response_time=' + str(peak_response_time) # Timestamp result += ' ' + str(_getNSTime(time)) - # print(result) + print(result) return result +#ipendpoint_route,ipendpoint_id,cont_nav=FQDN HTTP_REQUESTS_FQDN_M, NETWORK_FQDN_LATENCY timestamp +def generate_ipendpoint_route(resource, requests, latency, time): + # Measurement + result = 'ipendpoint_route' + # Tags + result += ',cont_nav=\"' + str(resource) + "\" " + # Fields + + # result += 'cont_rep=' + str(quality) + ',' + result += 'http_requests_fqdn_m=' + str(requests) + ',' + result += 'network_fqdn_latency=' + str(latency) + # Timestamp + result += ' ' + str(_getNSTime(time)) + #print(result) + return result # Influx needs strings to be quoted, this provides a utility interface to do this def quote_wrap(str): @@ -82,7 +98,9 @@ def quote_wrap(str): # InfluxDB likes to have time-stamps in nanoseconds def _getNSTime(time): # Convert to nano-seconds - return 1000000 * time + timestamp = int(1000000000*time) + #print("timestamp", timestamp) + return timestamp # DEPRICATED # ____________________________________________________________________________ diff --git a/src/mediaServiceSim/simulator_v2.py b/src/mediaServiceSim/simulator_v2.py index 9f06f9dc2ce226ff8d7956afc5cfaadf33791bc0..ac8b0c504686410846413520a1dfda989f9a7153 100644 --- a/src/mediaServiceSim/simulator_v2.py +++ b/src/mediaServiceSim/simulator_v2.py @@ -2,14 +2,23 @@ import LineProtocolGenerator as lp import time import urllib.parse import urllib.request -from random import random, randint +import sys +import random + +# Simulation parameters +TICK_TIME = 1 +DEFAULT_REQUEST_RATE_INC = 1 +DEFAULT_REQUEST_RATE_INC_PERIOD = 10 +SIMULATION_TIME_SEC = 60*60 + +# CLMC parameters +INFLUX_DB_URL = 'http://192.168.50.10:8086' +AGENT_URL1 = 'http://192.168.50.11:8186' +AGENT_URL2 = 'http://192.168.50.12:8186' # Simulator for services class sim: def __init__(self, influx_url): - # requests per second for different quality levels - self.quality_request_rate = {"DC1": [10, 20, 10], "DC2": [5, 30, 5]} - # We don't need this as the db is CLMC metrics self.influx_db = 'CLMCMetrics' self.influx_url = influx_url @@ -19,52 +28,156 @@ class sim: def run(self, simulation_length_seconds): - start_time = time.time() - current_time = int(time.time()) - surrogate_services = [{'agent_url': 'http://192.168.50.11:8186', 'location': 'DC1', 'cpu': 2, - 'mem': '8GB', 'storage': '1TB'}, - {'agent_url': 'http://192.168.50.12:8186', 'location': 'DC2', 'cpu': 4, - 'mem': '8GB', 'storage': '1TB'} - ] - # Simulate surrogate services being asserted - for service in surrogate_services: - self._sendInfluxData(service['agent_url'], lp.generate_vm_config('starting', service['cpu'], service['mem'], service['storage'], current_time)) - for service in surrogate_services: - self._sendInfluxData(service['agent_url'], lp.generate_vm_config('running', service['cpu'], service['mem'], service['storage'], current_time)) - - # Run simulation - for i in range(simulation_length_seconds): - for service in surrogate_services: - - # Scale CPU usage on number of requests, quality and cpu allocation - cpu_usage = self.quality_request_rate[service['location']][0] - cpu_usage += self.quality_request_rate[service['location']][1]*2 - cpu_usage += self.quality_request_rate[service['location']][2]*4 - cpu_usage = cpu_usage/service['cpu'] - cpu_usage = cpu_usage/100 # Transform into % - self._sendInfluxData(service['agent_url'], lp.generate_cpu_report( cpu_usage, cpu_usage, current_time)) - - # Scale SENT/REC bytes on requests and quality - bytes = self.quality_request_rate[service['location']][0] - bytes += self.quality_request_rate[service['location']][1]*2 - bytes += self.quality_request_rate[service['location']][2]*4 - bytes_sent = 1024*bytes - bytes_rec = 32*bytes - self._sendInfluxData(service['agent_url'], lp.generate_network_report(bytes_rec, bytes_sent, current_time)) - - # Scale MPEG Dash on requests, quality, cpu usage - avg_response_time = randint(0, 5 * self.quality_request_rate[service['location']][0]) - avg_response_time += randint(0, 10 * self.quality_request_rate[service['location']][1]) - avg_response_time += randint(0, 15 * self.quality_request_rate[service['location']][2]) - avg_response_time *= cpu_usage - peak_response_time = avg_response_time + randint(30, 60) - requests = sum(self.quality_request_rate[service['location']]) - self._sendInfluxData(service['agent_url'], lp.generate_mpegdash_report('https://Netflix.com/scream', requests, avg_response_time, peak_response_time, current_time)) - # Add a second to the clock - current_time += 1000 - end_time = time.time() + start_time = time.time()-SIMULATION_TIME_SEC + sim_time = start_time + + # segment_size : the length of video requested at a time + # bit_rate: MPEG-2 High 1080p 25fps = 80Mbps + ip_endpoints = [{'agent_url': AGENT_URL1, 'location': 'DC1', 'cpu': 16, + 'mem': '8GB', 'storage': '1TB', 'request_queue': 0, 'request_arrival_rate': 0, + 'segment_size': 2, 'video_bit_rate': 80, 'packet_size': 1500}, + {'agent_url': AGENT_URL2, 'location': 'DC2', 'cpu': 4, + 'mem': '8GB', 'storage': '1TB', 'request_queue': 0, 'request_arrival_rate': 0, + 'segment_size': 2, 'video_bit_rate': 80, 'packet_size': 1500} + ] + + # Simulate configuration of the ipendpoints + # endpoint state->mu, sigma, secs normal distribution + config_delay_dist = {"placing": [10, 0.68], "booting": [10, 0.68],"connecting": [10, 0.68]} + + # Place endpoints + max_delay = 0 + for ip_endpoint in ip_endpoints: + delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['placing'][0], config_delay_dist['placing'][0]*config_delay_dist['placing'][1], 'placing', 'placed') + if delay_time > max_delay: + max_delay = delay_time + sim_time +=max_delay + + # Boot endpoints + max_delay = 0 + for ip_endpoint in ip_endpoints: + delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['booting'][0], config_delay_dist['booting'][0]*config_delay_dist['booting'][1], 'booting', 'booted') + if delay_time > max_delay: + max_delay = delay_time + sim_time +=max_delay + + # Connect endpoints + max_delay = 0 + for ip_endpoint in ip_endpoints: + delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['connecting'][0], config_delay_dist['connecting'][0]*config_delay_dist['connecting'][1], 'connecting', 'connected') + if delay_time > max_delay: + max_delay = delay_time + sim_time +=max_delay + + request_arrival_rate_inc = DEFAULT_REQUEST_RATE_INC + request_queue = 0 + inc_period_count = 0 + for i in range(simulation_length_seconds): + for ip_endpoint in ip_endpoints: + request_processing_time = 0 + cpu_time_available = 0 + requests_processed = 0 + max_requests_processed = 0 + cpu_active_time = 0 + cpu_idle_time = 0 + cpu_usage = 0 + cpu_load_time = 0 + avg_response_time = 0 + peak_response_time = 0 + + # linear inc to arrival rate + if inc_period_count >= DEFAULT_REQUEST_RATE_INC_PERIOD: + ip_endpoint['request_arrival_rate'] += request_arrival_rate_inc + inc_period_count = 0 + else: + inc_period_count += 1 + # add new requests to the queue + ip_endpoint['request_queue'] += ip_endpoint['request_arrival_rate'] + + # time to process one second of video (mS) in the current second + request_processing_time = int(random.normalvariate(10, 10*0.68)) + if request_processing_time <= 10: + request_processing_time = 10 + # time depends on the length of the segments in seconds + request_processing_time *= ip_endpoint['segment_size'] + + # amount of cpu time (mS) per tick + cpu_time_available = ip_endpoint['cpu']*TICK_TIME*1000 + max_requests_processed = int(cpu_time_available/request_processing_time) + # calc how many requests processed + if ip_endpoint['request_queue'] <= max_requests_processed: + # processed all of the requests + requests_processed = ip_endpoint['request_queue'] + else: + # processed the maxmum number of requests + requests_processed = max_requests_processed + + # calculate cpu usage + cpu_active_time = int(requests_processed*request_processing_time) + cpu_idle_time = int(cpu_time_available-cpu_active_time) + cpu_usage = cpu_active_time/cpu_time_available + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_cpu_report(cpu_usage, cpu_active_time, cpu_idle_time, sim_time)) + + # calc network usage metrics + bytes_rx = 2048*requests_processed + bytes_tx = int(ip_endpoint['video_bit_rate']/8*1000000*requests_processed*ip_endpoint['segment_size']) + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_network_report(bytes_rx, bytes_tx, sim_time)) + + # time to process all of the requests in the queue + peak_response_time = ip_endpoint['request_queue']*request_processing_time/ip_endpoint['cpu'] + # mid-range + avg_response_time = (peak_response_time+request_processing_time)/2 + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_mpegdash_report('https://Netflix.com/scream', ip_endpoint['request_arrival_rate'], avg_response_time, peak_response_time, sim_time)) + + # need to calculate this but sent at 5mS for now + network_request_delay = 0.005 + + # calculate network response delays (2km link, 100Mbps) + network_response_delay = self._calcNetworkDelay(2000, 100, ip_endpoint['packet_size'], ip_endpoint['video_bit_rate'], ip_endpoint['segment_size']) + + e2e_delay = network_request_delay + (avg_response_time/1000) + network_response_delay + + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_ipendpoint_route('https://Netflix.com/scream', ip_endpoint['request_arrival_rate'], e2e_delay, sim_time)) + + # remove requests processed off the queue + ip_endpoint['request_queue'] -= int(requests_processed) + + sim_time += TICK_TIME + end_time = sim_time print("Simulation Finished. Start time {0}. End time {1}. Total time {2}".format(start_time,end_time,end_time-start_time)) + # distance metres + # bandwidth Mbps + # package size bytes + # tx_video_bit_rate bp/sec + # segment size sec + def _calcNetworkDelay(self, distance, bandwidth, packet_size, tx_video_bit_rate, segment_size): + response_delay = 0 + + # propogation delay = distance/speed () (e.g 2000 metres * 2*10^8 for optical fibre) + propogation_delay = distance/(2*100000000) + # packetisation delay = ip packet size (bits)/tx rate (e.g. 100Mbp with 0% packet loss) + packetisation_delay = (packet_size*8)/(bandwidth*1000000) + # print('packetisation_delay:', packetisation_delay) + # total number of packets to be sent + packets = (tx_video_bit_rate*1000000)/(packet_size*8) + # print('packets:', packets) + response_delay = packets*(propogation_delay+packetisation_delay) + # print('response_delay:', response_delay) + + return response_delay + + def _changeVMState(self, sim_time, ip_endpoint, mu, sigma, transition_state, next_state): + delay_time = 0 + + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_vm_config(transition_state, ip_endpoint['cpu'], ip_endpoint['mem'], ip_endpoint['storage'], sim_time)) + + delay_time = random.normalvariate(mu, sigma) + + self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_vm_config(next_state, ip_endpoint['cpu'], ip_endpoint['mem'], ip_endpoint['storage'], sim_time+delay_time)) + + return delay_time + def _createDB(self): self._sendInfluxQuery(self.influx_url, 'CREATE DATABASE ' + self.influx_db) @@ -83,9 +196,8 @@ class sim: data = data.encode() header = {'Content-Type': 'application/octet-stream'} req = urllib.request.Request(url + '/write?db=' + self.influx_db, data, header) - urllib.request.urlopen(req) - + urllib.request.urlopen(req) -simulator = sim('http://192.168.50.10:8086') -simulator.run(180) +simulator = sim(INFLUX_DB_URL) +simulator.run(SIMULATION_TIME_SEC)