Skip to content
Snippets Groups Projects
Commit 7a1395f9 authored by Michael Boniface's avatar Michael Boniface
Browse files

Merge branch 'sim-extend' into 'integration'

Sim extend

See merge request FLAME/flame-clmc!5
parents 3e946e2d c8a24805
No related branches found
No related tags found
No related merge requests found
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
# Global tags can be specified here in key="value" format.
[global_tags]
# location of the data centre
location={{LOCATION}}
# media service template id
sfc={{SFC_ID}}
# media service instance
sfc_i={{SFC_ID_INSTANCE}}
# service function type
sf={{SF_ID}}
# service function instance id
sf_i={{SF_ID_INSTANCE}}
# ipendpoint id aka surrogate instance
ipendpoint={{IP_ENDPOINT_ID}}
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "10s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will cache metric_buffer_limit metrics for each output, and will
## flush this buffer on a successful write.
metric_buffer_limit = 1000
## Flush the buffer whenever full, regardless of flush_interval.
flush_buffer_when_full = true
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. You shouldn't set this below
## interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "10s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## Logging configuration:
## Run telegraf in debug mode
debug = false
## Run telegraf in quiet mode
quiet = false
## Specify the log file name. The empty string means to log to stdout.
logfile = "G:/Telegraf/telegraf.log"
## Override default hostname, if empty use os.Hostname()
hostname = ""
###############################################################################
# OUTPUTS #
###############################################################################
# Configuration for influxdb server to send metrics to
[[outputs.influxdb]]
# The full HTTP or UDP endpoint URL for your InfluxDB instance.
# Multiple urls can be specified but it is assumed that they are part of the same
# cluster, this means that only ONE of the urls will be written to each interval.
# urls = ["udp://127.0.0.1:8089"] # UDP endpoint example
urls = ["{{INFLUXDB_URL}}"] # required
# The target database for metrics (telegraf will create it if not exists)
database = "CLMCMetrics" # required
# Precision of writes, valid values are "ns", "us" (or "µs"), "ms", "s", "m", "h".
# note: using second precision greatly helps InfluxDB compression
precision = "s"
## Write timeout (for the InfluxDB client), formatted as a string.
## If not provided, will default to 5s. 0s means no timeout (not recommended).
timeout = "5s"
# username = "telegraf"
# password = "metricsmetricsmetricsmetrics"
# Set the user agent for HTTP POSTs (can be useful for log differentiation)
# user_agent = "telegraf"
# Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
# udp_payload = 512
[[outputs.file]]
## Files to write to, "stdout" is a specially handled file.
files = ["stdout", "/tmp/metrics.out"]
## Data format to output.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
data_format = "influx"
###############################################################################
# INPUTS #
###############################################################################
# # Influx HTTP write listener
[[inputs.http_listener]]
## Address and port to host HTTP listener on
service_address = ":8186"
## timeouts
read_timeout = "10s"
write_timeout = "10s"
## HTTPS
#tls_cert= "/etc/telegraf/cert.pem"
#tls_key = "/etc/telegraf/key.pem"
## MTLS
#tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"]
\ No newline at end of file
......@@ -18,7 +18,7 @@ def generate_network_report(recieved_bytes, sent_bytes, time):
result += ' ' + str(_getNSTime(time))
# Measurement
# print('network'+result)
#print(result)
return result
......@@ -42,13 +42,14 @@ def generate_vm_config(state, cpu, mem, storage, time):
# Reports cpu usage, scaling on requests
def generate_cpu_report(cpu_useage, cpu_usage_system, time):
result = 'vm_host_cpu_usage'
def generate_cpu_report(cpu_usage, cpu_active_time, cpu_idle_time, time):
result = 'cpu_usage'
# Tag
result += ' '
# field
result += 'cpu_usage='+str(cpu_useage)
result += ',cpu_usage_system='+str(cpu_usage_system)
result += 'cpu_usage='+str(cpu_usage)
result += ',cpu_active_time='+str(cpu_active_time)
result += ',cpu_idle_time='+str(cpu_idle_time)
result += ' '
# Time
result += str(_getNSTime(time))
......@@ -70,9 +71,24 @@ def generate_mpegdash_report(resource, requests, avg_response_time, peak_respons
result += 'peak_response_time=' + str(peak_response_time)
# Timestamp
result += ' ' + str(_getNSTime(time))
# print(result)
print(result)
return result
#ipendpoint_route,ipendpoint_id,cont_nav=FQDN HTTP_REQUESTS_FQDN_M, NETWORK_FQDN_LATENCY timestamp
def generate_ipendpoint_route(resource, requests, latency, time):
# Measurement
result = 'ipendpoint_route'
# Tags
result += ',cont_nav=\"' + str(resource) + "\" "
# Fields
# result += 'cont_rep=' + str(quality) + ','
result += 'http_requests_fqdn_m=' + str(requests) + ','
result += 'network_fqdn_latency=' + str(latency)
# Timestamp
result += ' ' + str(_getNSTime(time))
#print(result)
return result
# Influx needs strings to be quoted, this provides a utility interface to do this
def quote_wrap(str):
......@@ -82,7 +98,9 @@ def quote_wrap(str):
# InfluxDB likes to have time-stamps in nanoseconds
def _getNSTime(time):
# Convert to nano-seconds
return 1000000 * time
timestamp = int(1000000000*time)
#print("timestamp", timestamp)
return timestamp
# DEPRICATED
# ____________________________________________________________________________
......
......@@ -2,14 +2,23 @@ import LineProtocolGenerator as lp
import time
import urllib.parse
import urllib.request
from random import random, randint
import sys
import random
# Simulation parameters
TICK_TIME = 1
DEFAULT_REQUEST_RATE_INC = 1
DEFAULT_REQUEST_RATE_INC_PERIOD = 10
SIMULATION_TIME_SEC = 60*60
# CLMC parameters
INFLUX_DB_URL = 'http://192.168.50.10:8086'
AGENT_URL1 = 'http://192.168.50.11:8186'
AGENT_URL2 = 'http://192.168.50.12:8186'
# Simulator for services
class sim:
def __init__(self, influx_url):
# requests per second for different quality levels
self.quality_request_rate = {"DC1": [10, 20, 10], "DC2": [5, 30, 5]}
# We don't need this as the db is CLMC metrics
self.influx_db = 'CLMCMetrics'
self.influx_url = influx_url
......@@ -19,52 +28,156 @@ class sim:
def run(self, simulation_length_seconds):
start_time = time.time()
current_time = int(time.time())
surrogate_services = [{'agent_url': 'http://192.168.50.11:8186', 'location': 'DC1', 'cpu': 2,
'mem': '8GB', 'storage': '1TB'},
{'agent_url': 'http://192.168.50.12:8186', 'location': 'DC2', 'cpu': 4,
'mem': '8GB', 'storage': '1TB'}
]
# Simulate surrogate services being asserted
for service in surrogate_services:
self._sendInfluxData(service['agent_url'], lp.generate_vm_config('starting', service['cpu'], service['mem'], service['storage'], current_time))
for service in surrogate_services:
self._sendInfluxData(service['agent_url'], lp.generate_vm_config('running', service['cpu'], service['mem'], service['storage'], current_time))
# Run simulation
for i in range(simulation_length_seconds):
for service in surrogate_services:
# Scale CPU usage on number of requests, quality and cpu allocation
cpu_usage = self.quality_request_rate[service['location']][0]
cpu_usage += self.quality_request_rate[service['location']][1]*2
cpu_usage += self.quality_request_rate[service['location']][2]*4
cpu_usage = cpu_usage/service['cpu']
cpu_usage = cpu_usage/100 # Transform into %
self._sendInfluxData(service['agent_url'], lp.generate_cpu_report( cpu_usage, cpu_usage, current_time))
# Scale SENT/REC bytes on requests and quality
bytes = self.quality_request_rate[service['location']][0]
bytes += self.quality_request_rate[service['location']][1]*2
bytes += self.quality_request_rate[service['location']][2]*4
bytes_sent = 1024*bytes
bytes_rec = 32*bytes
self._sendInfluxData(service['agent_url'], lp.generate_network_report(bytes_rec, bytes_sent, current_time))
# Scale MPEG Dash on requests, quality, cpu usage
avg_response_time = randint(0, 5 * self.quality_request_rate[service['location']][0])
avg_response_time += randint(0, 10 * self.quality_request_rate[service['location']][1])
avg_response_time += randint(0, 15 * self.quality_request_rate[service['location']][2])
avg_response_time *= cpu_usage
peak_response_time = avg_response_time + randint(30, 60)
requests = sum(self.quality_request_rate[service['location']])
self._sendInfluxData(service['agent_url'], lp.generate_mpegdash_report('https://Netflix.com/scream', requests, avg_response_time, peak_response_time, current_time))
# Add a second to the clock
current_time += 1000
end_time = time.time()
start_time = time.time()-SIMULATION_TIME_SEC
sim_time = start_time
# segment_size : the length of video requested at a time
# bit_rate: MPEG-2 High 1080p 25fps = 80Mbps
ip_endpoints = [{'agent_url': AGENT_URL1, 'location': 'DC1', 'cpu': 16,
'mem': '8GB', 'storage': '1TB', 'request_queue': 0, 'request_arrival_rate': 0,
'segment_size': 2, 'video_bit_rate': 80, 'packet_size': 1500},
{'agent_url': AGENT_URL2, 'location': 'DC2', 'cpu': 4,
'mem': '8GB', 'storage': '1TB', 'request_queue': 0, 'request_arrival_rate': 0,
'segment_size': 2, 'video_bit_rate': 80, 'packet_size': 1500}
]
# Simulate configuration of the ipendpoints
# endpoint state->mu, sigma, secs normal distribution
config_delay_dist = {"placing": [10, 0.68], "booting": [10, 0.68],"connecting": [10, 0.68]}
# Place endpoints
max_delay = 0
for ip_endpoint in ip_endpoints:
delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['placing'][0], config_delay_dist['placing'][0]*config_delay_dist['placing'][1], 'placing', 'placed')
if delay_time > max_delay:
max_delay = delay_time
sim_time +=max_delay
# Boot endpoints
max_delay = 0
for ip_endpoint in ip_endpoints:
delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['booting'][0], config_delay_dist['booting'][0]*config_delay_dist['booting'][1], 'booting', 'booted')
if delay_time > max_delay:
max_delay = delay_time
sim_time +=max_delay
# Connect endpoints
max_delay = 0
for ip_endpoint in ip_endpoints:
delay_time = self._changeVMState(sim_time, ip_endpoint, config_delay_dist['connecting'][0], config_delay_dist['connecting'][0]*config_delay_dist['connecting'][1], 'connecting', 'connected')
if delay_time > max_delay:
max_delay = delay_time
sim_time +=max_delay
request_arrival_rate_inc = DEFAULT_REQUEST_RATE_INC
request_queue = 0
inc_period_count = 0
for i in range(simulation_length_seconds):
for ip_endpoint in ip_endpoints:
request_processing_time = 0
cpu_time_available = 0
requests_processed = 0
max_requests_processed = 0
cpu_active_time = 0
cpu_idle_time = 0
cpu_usage = 0
cpu_load_time = 0
avg_response_time = 0
peak_response_time = 0
# linear inc to arrival rate
if inc_period_count >= DEFAULT_REQUEST_RATE_INC_PERIOD:
ip_endpoint['request_arrival_rate'] += request_arrival_rate_inc
inc_period_count = 0
else:
inc_period_count += 1
# add new requests to the queue
ip_endpoint['request_queue'] += ip_endpoint['request_arrival_rate']
# time to process one second of video (mS) in the current second
request_processing_time = int(random.normalvariate(10, 10*0.68))
if request_processing_time <= 10:
request_processing_time = 10
# time depends on the length of the segments in seconds
request_processing_time *= ip_endpoint['segment_size']
# amount of cpu time (mS) per tick
cpu_time_available = ip_endpoint['cpu']*TICK_TIME*1000
max_requests_processed = int(cpu_time_available/request_processing_time)
# calc how many requests processed
if ip_endpoint['request_queue'] <= max_requests_processed:
# processed all of the requests
requests_processed = ip_endpoint['request_queue']
else:
# processed the maxmum number of requests
requests_processed = max_requests_processed
# calculate cpu usage
cpu_active_time = int(requests_processed*request_processing_time)
cpu_idle_time = int(cpu_time_available-cpu_active_time)
cpu_usage = cpu_active_time/cpu_time_available
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_cpu_report(cpu_usage, cpu_active_time, cpu_idle_time, sim_time))
# calc network usage metrics
bytes_rx = 2048*requests_processed
bytes_tx = int(ip_endpoint['video_bit_rate']/8*1000000*requests_processed*ip_endpoint['segment_size'])
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_network_report(bytes_rx, bytes_tx, sim_time))
# time to process all of the requests in the queue
peak_response_time = ip_endpoint['request_queue']*request_processing_time/ip_endpoint['cpu']
# mid-range
avg_response_time = (peak_response_time+request_processing_time)/2
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_mpegdash_report('https://Netflix.com/scream', ip_endpoint['request_arrival_rate'], avg_response_time, peak_response_time, sim_time))
# need to calculate this but sent at 5mS for now
network_request_delay = 0.005
# calculate network response delays (2km link, 100Mbps)
network_response_delay = self._calcNetworkDelay(2000, 100, ip_endpoint['packet_size'], ip_endpoint['video_bit_rate'], ip_endpoint['segment_size'])
e2e_delay = network_request_delay + (avg_response_time/1000) + network_response_delay
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_ipendpoint_route('https://Netflix.com/scream', ip_endpoint['request_arrival_rate'], e2e_delay, sim_time))
# remove requests processed off the queue
ip_endpoint['request_queue'] -= int(requests_processed)
sim_time += TICK_TIME
end_time = sim_time
print("Simulation Finished. Start time {0}. End time {1}. Total time {2}".format(start_time,end_time,end_time-start_time))
# distance metres
# bandwidth Mbps
# package size bytes
# tx_video_bit_rate bp/sec
# segment size sec
def _calcNetworkDelay(self, distance, bandwidth, packet_size, tx_video_bit_rate, segment_size):
response_delay = 0
# propogation delay = distance/speed () (e.g 2000 metres * 2*10^8 for optical fibre)
propogation_delay = distance/(2*100000000)
# packetisation delay = ip packet size (bits)/tx rate (e.g. 100Mbp with 0% packet loss)
packetisation_delay = (packet_size*8)/(bandwidth*1000000)
# print('packetisation_delay:', packetisation_delay)
# total number of packets to be sent
packets = (tx_video_bit_rate*1000000)/(packet_size*8)
# print('packets:', packets)
response_delay = packets*(propogation_delay+packetisation_delay)
# print('response_delay:', response_delay)
return response_delay
def _changeVMState(self, sim_time, ip_endpoint, mu, sigma, transition_state, next_state):
delay_time = 0
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_vm_config(transition_state, ip_endpoint['cpu'], ip_endpoint['mem'], ip_endpoint['storage'], sim_time))
delay_time = random.normalvariate(mu, sigma)
self._sendInfluxData(ip_endpoint['agent_url'], lp.generate_vm_config(next_state, ip_endpoint['cpu'], ip_endpoint['mem'], ip_endpoint['storage'], sim_time+delay_time))
return delay_time
def _createDB(self):
self._sendInfluxQuery(self.influx_url, 'CREATE DATABASE ' + self.influx_db)
......@@ -83,9 +196,8 @@ class sim:
data = data.encode()
header = {'Content-Type': 'application/octet-stream'}
req = urllib.request.Request(url + '/write?db=' + self.influx_db, data, header)
urllib.request.urlopen(req)
urllib.request.urlopen(req)
simulator = sim('http://192.168.50.10:8086')
simulator.run(180)
simulator = sim(INFLUX_DB_URL)
simulator.run(SIMULATION_TIME_SEC)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment