pyArango
pyArango copied to clipboard
while running the sample code for performance analysis, memory useage has soared to 100% in a few minutes.
My Environment
ArangoDB Version:3.4.0 Storage Engine: RocksDB Deployment Mode: Single Server pyArango Version:1.3.2 Operating System: Ubuntu16.04 hard disk capacity:20G RAM:2G
Sample Code
#!/usr/bin/python
import json as json_mod
import sys
import random
import statsd
import pyArango
from datetime import datetime
from pyArango.connection import *
from pyArango.collection import *
import requests
from gevent import monkey
from gevent import GreenletExit
monkey.patch_all()
import gevent
import grequests
statsdc = {}
auth_token = None
connection_urls = [
"http://127.0.0.1:8529"
]
class JWTAuth(requests.auth.AuthBase):
def __init__(self, token):
self.token = token
def __call__(self, r):
# Implement JWT authentication
r.headers['Authorization'] = 'Bearer %s' % self.token
return r
def get_auth_token():
global auth_token, connection_urls
if auth_token:
return auth_token
kwargs = {'data': '{"username":"%s","password":"%s"}' % ("root", "")}
for connection_url in connection_urls:
response = requests.post('%s/_open/auth' % connection_url, **kwargs)
if response.ok:
json_data = response.content
if json_data:
data_dict = json_mod.loads(json_data.decode('utf-8'))
auth_token = data_dict.get('jwt')
break
return auth_token
class AikidoSession(object):
# def __init__(self, session_username, session_password):
# statsdc.incr('conn')
# if session_username:
# self.auth = JWTAuth(session_password)
# else:
# self.auth = None
def __init__(self, *args, **kwargs):
statsdc.incr('conn')
session_username, session_password = args[:2]
if session_username:
self.auth = JWTAuth(session_password)
else:
self.auth = None
def post(self, url, data=None, json=None, **kwargs):
if data is not None:
kwargs['data'] = data
if json is not None:
kwargs['json'] = json
kwargs['auth'] = self.auth
return grequests.map([grequests.post(url, **kwargs)])[0]
def get(self, url, **kwargs):
kwargs['auth'] = self.auth
result = grequests.map([grequests.get(url, **kwargs)])[0]
return result
def put(self, url, data=None, **kwargs):
if data is not None:
kwargs['data'] = data
kwargs['auth'] = self.auth
return grequests.map([grequests.put(url, **kwargs)])[0]
def head(self, url, **kwargs):
kwargs['auth'] = self.auth
return grequests.map([grequests.put(url, **kwargs)])[0]
def options(self, url, **kwargs):
kwargs['auth'] = self.auth
return grequests.map([grequests.options(url, **kwargs)])[0]
def patch(self, url, data=None, **kwargs):
if data is not None:
kwargs['data'] = data
kwargs['auth'] = self.auth
return grequests.map([grequests.patch(url, **kwargs)])[0]
def delete(self, url, **kwargs):
kwargs['auth'] = self.auth
return grequests.map([grequests.delete(url, **kwargs)])[0]
def disconnect(self):
statsdc.decr('conn')
pass
# Monkey patch the connection object:
pyArango.connection.AikidoSession = AikidoSession
def microsecs_to_millisec_string(microsecs):
return str('%d.%dms' % (microsecs / 1000, microsecs % 1000))
def get_time_since(start_time, idstr):
diff = datetime.now() - start_time
microsecs = (diff.total_seconds() * 1000 * 1000) + diff.microseconds
statsdc.timing(idstr, int(microsecs))
return microsecs_to_millisec_string(microsecs)
statsdc = statsd.StatsClient('127.0.0.1', '8125')
conn = Connection(username="root", password=get_auth_token(), statsdClient=statsdc)
db = conn["_system"]
transaction = '''
function(params) {
var db = require('@arangodb').db;
var startOne = Date.now();
var q1 = db._query(
`FOR oneUser IN user
FILTER user._key == @userid
UPDATE {
_key: oneUser._key,
lastseen: @timestamp,
counter: oneUser.counter + 1
} IN user`,
{
userid: 'user_' + params.i,
timestamp: params.timestamp
});
var startTwo = Date.now();
var q2 = db._query(`FOR v, e IN 1..1 OUTBOUND @user userToGroups
FILTER e.counter == @i
UPDATE {
_key: v._key,
counter: v.counter + 1
} IN groups`,
{
user: 'user/user_' + params.i,
i: params.i % 10
});
var startThree = Date.now();
var q3 = db._query(`RETURN 1`);
var end = Date.now();
return {
tq1: startTwo - startOne,
tq2: startThree - startTwo,
tq3: end - startThree,
all: end - startOne
};
}
'''
def worker(i):
# add a bit of a variance to the startup
gevent.sleep(0.1 * random.random())
statsdc.incr('clients')
start_time = datetime.now()
try:
aql = '''
FOR user IN user FILTER user._key == @username RETURN user
'''
db.AQLQuery(aql, rawResults=True, batchSize=1, count=True, bindVars={'username': 'user_%d' % i})
times = db.transaction(action=transaction,
collections={"read": ['userToGroups'], "write": ['user', 'groups']},
params={'i': i, 'timestamp': start_time.isoformat()})['result']
for which in times:
statsdc.timing(which, times[which])
except Exception as e:
statsdc.incr('errors')
print('Error in worker %d: error: %s in %s' % (i, str(e), get_time_since(start_time, 'errw')))
statsdc.decr('clients')
raise GreenletExit
print(sys.argv)
userrange_start = int(sys.argv[1])
userrange_end = int(sys.argv[2])
req_per_sec = float(sys.argv[3])
while (userrange_start < userrange_end):
userrange_start += 1
gevent.spawn(lambda i=userrange_start: worker(i))
gevent.sleep(1.0 / float(req_per_sec))
This is a changed version of code which solved the highly memeory useage problems.
ps:excerpted parts that have changed.
Initializing the data
#! /usr/bin/env python
from pyArango.connection import Connection
conn = Connection(username='root', password='')
db = conn['_system']
if not db.hasCollection('user'):
userCol = db.createCollection('Collection', name='user')
else:
userCol = db.collections['user']
userCol.truncate()
if not db.hasCollection('group'):
groupCol = db.createCollection('Collection', name='group')
else:
groupCol = db.collections['group']
groupCol.truncate()
if not db.hasCollection('userToGroup'):
userToGroupCol = db.createCollection(className='Edges', name='userToGroup')
else:
userToGroupCol = db.collections['userToGroup']
userToGroupCol.truncate()
noUsers = 100000
i = 0
while i < noUsers:
i += 1
userCol.createDocument({
'_key': ('user_%d' % i),
'foo': 'bar',
'count': i,
'counter': i,
'visits': 0,
'name': ("i am user no %d" % i),
'somePayLoad': 'lorem Ipsem' * 10
}).save()
userCol.ensureHashIndex(['count'], sparse=False)
noGroups = noUsers / 10
# We have one group each 10 users
i = 0
while i < noGroups:
i += 1
groupCol.createDocument({
'_key': 'group_%d' % i,
'counter': i,
'name': "i am group no %d" % i
}).save()
i = 0
while i < noUsers:
j = 0
i += 1
while j < i % 10:
j += 1
userToGroupCol.createDocument({
'_from': 'user/user_%d' % i,
'_to': 'group/group_%d' % j,
'groupRelationNo': j,
'foo': 'bar',
'name': "i am making user %d a member of group no %d" % (i, j)
}).save()
The actual test code
transaction = '''
function(params) {
var db = require('@arangodb').db;
var startOne = Date.now();
var q1 = db._query(
`FOR oneUser IN user
FILTER oneUser._key == @userid
UPDATE {
_key: oneUser._key,
lastseen: @timestamp,
counter: oneUser.counter + 1
} IN user`,
{
userid: 'user_' + params.i,
timestamp: params.timestamp
});
var startTwo = Date.now();
var q2 = db._query(`FOR v, e IN 1..1 OUTBOUND @user userToGroup
FILTER e.groupRelationNo == @i
UPDATE {
_key: v._key,
counter: v.counter + 1
} IN group`,
{
user: 'user/user_' + params.i,
i: params.i % 10
});
var startThree = Date.now();
var q3 = db._query(`RETURN 1`);
var end = Date.now();
return {
tq1: startTwo - startOne,
tq2: startThree - startTwo,
tq3: end - startThree,
all: end - startOne
};
}
'''
So, is this the author's original intention? Or is there anything wrong with my code?
Hi @kaylee000,
Thanks fo this. I am not sure I get everything, is there a reason why you reimplement parts of pyArango in your sample?
Hi @tariqdaouda , The sample code is from this website:https://www.arangodb.com/2017/10/performance-analysis-pyarango-usage-scenarios/. I made these changes just for running it correctly.
q1
document user/user_101
query result
q2
one document from userToGroups collection
query result
As shown, there are no results for both queries. However, the results changed after i changed the code.
q1 query result
q2 query result