mindmeld
mindmeld copied to clipboard
Enabling ```Annotator``` class to be serializable through a multiprocessing object
I have been working on getting the Annotator
class to work with multiprocessing but ran into problems while implementing the below code.
from multiprocessing import Process
import multiprocessing
import mindmeld
from mindmeld.auto_annotator import BootstrapAnnotator
from mindmeld.components.nlp import NaturalLanguageProcessor
from flask import jsonify
import os
import sys
def auto_annotator():
agent_path = "/Users/bpurks/Documents/agent/askermo_stage__5fdc7bb07bd295f1db543b2a"
ret = multiprocessing.Value('i', 0)
manager = multiprocessing.Manager()
expres_list = manager.list()
def autoAnnotator(ret,expres_list,):
print(agent_path)
if os.path.exists(agent_path):
ba = BootstrapAnnotator(app_path=agent_path)
print(ba.nlp.process("release management"))
del ba
ret.value=1
else:
print("agent path not found!")
ret.value=0
print("After nested function")
p = Process(target=autoAnnotator, args=(ret, expres_list,))
p.daemon = True
p.start()
p.join()
agent_path_os = os.path.abspath(agent_path)
package_name = os.path.basename(agent_path_os)
if package_name in sys.modules:
del sys.modules[package_name]
if ret.value == 1:
return jsonify({'status':'success','message':'Returned the expression list'}),200
elif ret.value == 0:
return jsonify({"status": "failed", "reason": "agent path not found in the backend!"}), 400
auto_annotator()
Ran into the errors:
The process has forked and you cannot use this CoreFoundation functionality safely. You MUST exec().
Break on __THE_PROCESS_HAS_FORKED_AND_YOU_CANNOT_USE_THIS_COREFOUNDATION_FUNCTIONALITY___YOU_MUST_EXEC__() to debug.
RuntimeError: context has already been set
AttributeError: Can't pickle local object 'auto_annotator.<locals>.autoAnnotator’
Later found out that the Annotator
class is currently not designed to support multiprocessing. It would be great to be able to utilize multiprocessing with the Annotator
class if possible.