fastsnmp
fastsnmp copied to clipboard
How to gather metrics with less complexity?
Hello, As you can see in the following example, it's very expensive to get metrics with less complexity.
def masac_poller(device_type, timeout, retries, sqlitedbtable, banned):
""" Fetch SNMP metrics using fastsnmp module and send them to Carbon server """
# oids in group must be with same indexes
oid_group_main = {"1.3.6.1.2.1.2.2.1.2": "ifDescr", "1.3.6.1.2.1.2.2.1.10": "ifInOctets","1.3.6.1.2.1.2.2.1.16": "ifOutOctets",}
oid_group_in = {"1.3.6.1.2.1.2.2.1.10": "ifInOctets",}
oid_group_out = {"1.3.6.1.2.1.2.2.1.16": "ifOutOctets",}
oid_group = {"1.3.6.1.2.1.2.2.1.2": "ifDescr",}
tags = ("Vlan","unrouted","thernet")
if_desc_in = 'ifHCInOctets'
if_desc_out = 'ifHCOutOctets'
node = platform.node().replace('.', '-')
DELAY = 1
count = 0
for h in get_db_device_list(sqlitedbtable, device_type, banned):
timestamp = int(time.time()) # TODO Rather put timestamp inside the loop
###### Form a list of Interfaces and their indexes for a given DEVICE
hostnames = []
hostnames.append(h['ip'])
interfaces = []
e = {}
snmp_data = snmp_poller.poller(hostnames, (oid_group.keys(),), h['snmp_community'], 2, 0)
if len(list(snmp_data)) < 1 or h['snmp_community'] == '':
update_device_in_db(sqlitedbtable, h['login'], device_type, 1)
print("Banned " + h['ip'])
continue
snmp_data = snmp_poller.poller(hostnames, (oid_group.keys(),), h['snmp_community'], int(timeout), int(retries))
for d in snmp_data:
#print("host=%s oid=%s.%s value=%s" % (d[0], oid_group[d[1]], d[2], d[3]))
e = {'hostname':h['login'], 'device':d[0],'index':d[2], 'interface':d[3]}
interfaces.append(e)
# Form a list of OUT metrics
outoctets = []
e = {}
snmp_data = snmp_poller.poller(hostnames, (oid_group_out.keys(),), h['snmp_community'], int(timeout), int(retries))
for d in snmp_data:
e = {'device':d[0],'index':d[2], 'outoctets':d[3]}
outoctets.append(e)
###### Form a list of IN metrics for Interfaces and their indexes for a given DEVICE
inoctets = []
e = {}
snmp_data = snmp_poller.poller(hostnames, (oid_group_in.keys(),), h['snmp_community'], int(timeout), int(retries))
for d in snmp_data:
e = {'device':d[0],'index':d[2], 'inoctets':d[3]}
inoctets.append(e)
hostnames.pop() # Put that the last
# now form a coupling list
metrics = []
e= {}
for interface in interfaces:
for inoctet in inoctets:
for outoctet in outoctets:
if interface['index'] == inoctet['index'] and interface['index'] == outoctet['index']:
e= {'hostname':interface['hostname'], 'device':interface['device'], 'index':interface['index'], 'interface':interface['interface'], 'inoctets':inoctet['inoctets'], 'outoctets':outoctet['outoctets']}
if tags[2] in e['interface'] and tags[0] not in e['interface'] and tags[1] not in e['interface']:
metrics.append(e)
print("Processing: " + h['ip'])
lines= []
for ob in metrics:
identifier = ob['hostname'].replace(".","_")
if device_type == 'sdsl':
identifier = str(ob['hostname'].replace(".","_").replace("@","_at_")) + "_" +h['ip'].replace(".","_")
line_in = 'masac.test.%s.%s.%s %d %d' % (identifier, ob['interface'].replace("/","_").replace(" ","_"), if_desc_in, ob['inoctets'], timestamp)
line_out = 'masac.test.%s.%s.%s %d %d' % (identifier, ob['interface'].replace("/","_").replace(" ","_"), if_desc_out, ob['outoctets'], timestamp)
lines.append(line_in)
lines.append(line_out)
message = '\n'.join(lines) + '\n'
sock = socket.socket()
#sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect((CARBON_SERVER, CARBON_PORT))
sock.sendall(message.encode('UTF-8'))
sock.close()
#print(message)
count = count + 1
time.sleep(int(DELAY))
return count
- The CPU overhead is due to this snippet, I presume:
metrics = []
e= {}
for interface in interfaces:
for inoctet in inoctets:
for outoctet in outoctets:
if interface['index'] == inoctet['index'] and interface['index'] == outoctet['index']:
e= {'hostname':interface['hostname'], 'device':interface['device'], 'index':interface['index'], 'interface':interface['interface'], 'inoctets':inoctet['inoctets'], 'outoctets':outoctet['outoctets']}
if tags[2] in e['interface'] and tags[0] not in e['interface'] and tags[1] not in e['interface']:
metrics.append(e)
- Actually, I switched to that another way but I still do not know whether it's the best thing to do :
def masac_poller(device_type, timeout, retries, sqlitedbtable, banned):
""" Fetch SNMP metrics using fastsnmp module and send them to Carbon server """
# oids in group must be with same indexes
node = platform.node().replace('.', '-')
tags = ("Vlan","unrouted","thernet")
oid_group = {"1.3.6.1.2.1.2.2.1.2": "ifDescr", "1.3.6.1.2.1.2.2.1.10": "ifInOctets","1.3.6.1.2.1.2.2.1.16": "ifOutOctets",}
count = 0
for h in get_db_device_list(sqlitedbtable, device_type, banned):
timestamp = int(time.time())
hostnames = []
hostnames.append(h['ip'])
snmp_data = snmp_poller.poller(hostnames, (oid_group.keys(),), h['snmp_community'], int(timeout), int(retries))
if len(list(snmp_data)) < 1 or h['snmp_community'] == '':
update_device_in_db(sqlitedbtable, h['login'], device_type, 1)
print("Banned " + h['ip'])
continue
snmp_data = snmp_poller.poller(hostnames, (oid_group.keys(),), h['snmp_community'], int(timeout), int(retries))
e = {}
lines= []
for d in snmp_data:
metrics = []
oid = oid_group[d[1]]
host = d[0]
index = d[2]
value = d[3]
e['hostname'] = host
if "ifDescr" in oid:
e['ifDescr'] = value
e['index'] = index
if "ifInOctets" in oid:
pline = ("host=%s oid=%s.%s value=%s" % (host, oid, index, value))
e['ifHCInOctets'] = value
e['index'] = index
if "ifOutOctets" in oid:
e['ifHCOutOctets'] = value
e['index'] = index
metrics.insert(int(e['index']), e)
for device in metrics:
if len(device) == 5:
identity = h['login'].replace(".","_").replace("@","_at_")
if "sdsl" in device_type:
identity = str(h['hostname'].replace(".","_").replace("@","_at_")) + "_" + str(h['ip'])
for metric in ("ifHCInOctets","ifHCOutOctets"):
line = '%s.%s.%s.%s.%s %d %d' % (METRIC_PREFIX, device_type, identity, device['ifDescr'].replace("/","_").replace(" ","_"), metric, device[metric], timestamp)
if tags[0] not in device['ifDescr'] and tags[1] not in device['ifDescr'] and tags[2] in device['ifDescr']:
lines.append(line)
message = '\n'.join(lines) + '\n'
sock = socket.socket()
sock.connect((CARBON_SERVER, CARBON_PORT))
print(message)
sock.sendall(message.encode('UTF-8'))
sock.close()
hostnames.pop() # Pop the hostnames after fetching needed metrics
return count
Check this example https://github.com/gescheit/fastsnmp/blob/master/examples/graphite_sender.py
@gescheit Real thanks. I am going to check that soon and feedback. By the first read, I can tell this is some real python programming.
Hi, gesheit,
Do you have a benchmark for fastsnmp? Don't have to be accurate. For example, to poll one OID, how many hosts can it poll per second? 100, 1K, etc. To poll 4 OIDS, how many host can it poll per second?
Thanks
Hi! Main bottleneck of performance is decoding of SNMP-payload. My laptop is able to decode about 30k OIDs per second on localhost tests on 1 CPU. But in real life it must be considered with network parameters like delay, limited passthrough and packet-drops. Also network equipment typically have weak CPU and can answer very slowly or doesn't answer if it is on high load. So SNMP-poller must wait answer, resend query in case of timeout. Direct answer is next: in ideal world my laptop can pool 1k hosts with 4 OIDs in 1 second:) In real world, for example, 1k host would be polled in 1-30 seconds, 10k in 3-30 seconds, 100k 15-45...
Thank you for your answer.
I am very interested in your work. I may give it a try.
From: gescheit <[email protected]mailto:[email protected]> Reply-To: gescheit/fastsnmp <[email protected]mailto:[email protected]> Date: Monday, October 19, 2015 at 5:53 PM To: gescheit/fastsnmp <[email protected]mailto:[email protected]> Cc: Henry Lu <[email protected]mailto:[email protected]> Subject: Re: [fastsnmp] How to gather metrics with less complexity? (#2)
Hi! Main bottleneck of performance is decoding of SNMP-payload. My laptop is able to decode about 30k OIDs per second on localhost tests on 1 CPU. But in real life it must be considered with network parameters like delay, limited passthrough and packet-drops. Also network equipment typically have weak CPU and can answer very slowly or doesn't answer if it is on high load. So SNMP-poller must wait answer, resend query in case of timeout. Direct answer is next: in ideal world my laptop can pool 1k hosts with 4 OIDs in 1 second:) In real world, for example, 1k host would be polled in 1-30 seconds, 10k in 3-30 seconds.
Reply to this email directly or view it on GitHubhttps://github.com/gescheit/fastsnmp/issues/2#issuecomment-149358899.