sherlock
sherlock copied to clipboard
Using PID as owner value
Hi, why not using the process PID as Lock._owner
?
It can be useful to use that values to check if the process that acquired the lock is still running.
import os
import sys
import sherlock
from functools import wraps
from sherlock import RedisLock
sherlock.configure(backend=sherlock.backends.REDIS)
def _default_cmdline_validator(lock_instance, cmdline):
"""
Check if all the words in self.cmdline_has (defaults to sys.argv) is in the process cmdline
it is needed in the case of another process starts running using the same PID (after a crash)
"""
for word in lock_instance.cmdline_has:
if not word in cmdline:
return False
return True
class PIDLock(RedisLock):
"""
A SherLock that checks and vacuum the dead processes before validating if it is locked or not
"""
def __init__(self, lock_name, **kwargs):
super(PIDLock, self).__init__(lock_name, **kwargs)
# the lock can declare a different validator, (e.g using a regex if needed)
self.cmdline_validator = kwargs.get('cmdline_validator', _default_cmdline_validator)
# the decorator will set it to sys.argv by default
self.cmdline_has = kwargs.get('cmdline_has', [])
def pid_vacuum(self):
"""
Ensure that all existing locks are running processes
if there is a lock refering a PID that is not running, it is released
if there is a lock refering a PID that is running but is not related to sherlock, it is released
"""
# TODO : use psutil https://github.com/giampaolo/psutil
for key in self.client.keys(self.namespace + "*"):
PID = self.client.get(key)
if not PID: # should never happen (unless you set a bad namespace)
self.client.delete(key)
try: # check if process is running
cmdline = open('/proc/{}/cmdline'.format(PID)).read().split('\x00') # Ubuntu only, see TODO above
except IOError: # process is not running
self.client.delete(key)
else:
# validate if that is the desired process, else delete the key
if not self.cmdline_validator(self, cmdline):
self.client.delete(key)
def _acquire(self):
owner = os.getpid() # TODO: Check if it is needed to use getppid under cron task
if self.expire is None:
expire = -1
else:
expire = self.expire
if self._acquire_func(keys=[self._key_name,
owner,
expire]) != 1:
return False
self._owner = owner
return True
@property
def _locked(self):
self.pid_vacuum()
if self.client.get(self._key_name) is None:
return False
return True
def pid_locked(func):
"""
decorate functions and commands (click or flask scripts)
"""
@wraps(func)
def wrapper(*args, **kwargs):
print sys.argv
lock_name = "{func_name}|{args}|{kwargs}".format(
func_name=func.__name__,
args=",".join(map(str, args)),
kwargs=",".join("{}={}".format(*kv) for kv in kwargs.iteritems())
)
lock = PIDLock(lock_name, namespace="sherlock", cmdline_has=sys.argv)
print lock, lock_name
if not lock.locked():
print "Not locked"
try:
lock.acquire()
value = func(*args, **kwargs)
print value, "lock aquired"
except Exception as e:
print str(e)
print "release lock"
lock.release()
raise
else:
print "execution finished, releasing"
lock.release()
return value
else:
raise SystemExit("Locked")
return wrapper
Hey @rochacbruno ,
That's a great idea. In distributed environments though, multiple processes can have the same PID. So this approach can work fine on a single box but might fail if you have processes running on multiple boxes, which should be more common IMHO.
Is my understanding correct? Would love to hear more and take this forward.
Something similar to this is now implemented using the filelock library as the backend.