Deepcopy of a Synset
Hello,
Scenario: develop a custom transformer that work under Sklearn framework. This transformer is required to be dumped in/out by Sklearn framework. This transformer have a reference to an inverted index that use synset as the domain of a mapping function to integer.
Problem:
import copy
from nltk.corpus import wordnet
synsets = wordnet.synsets("try")
synset = synsets[0]
copy.deepcopy(synset)
it breaks by showing:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-95-022997bb6638> in <module>
2 synsets = wordnet.synsets("try")
3 synset = synsets[0]
----> 4 copy.deepcopy(synset)
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
~/.conda/envs/inforetrival/lib/python3.8/copy.py in <listcomp>(.0)
208
209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210 y = [deepcopy(a, memo) for a in x]
211 # We're not going to put the tuple in the memo, but it's still important we
212 # check for it, in case the tuple contains recursive mutable structures.
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
170 y = x
171 else:
--> 172 y = _reconstruct(x, memo, *rv)
173
174 # If is its own copy, don't memoize.
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
268 if state is not None:
269 if deep:
--> 270 state = deepcopy(state, memo)
271 if hasattr(y, '__setstate__'):
272 y.__setstate__(state)
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
144 copier = _deepcopy_dispatch.get(cls)
145 if copier is not None:
--> 146 y = copier(x, memo)
147 else:
148 if issubclass(cls, type):
~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
228 memo[id(x)] = y
229 for key, value in x.items():
--> 230 y[deepcopy(key, memo)] = deepcopy(value, memo)
231 return y
232 d[dict] = _deepcopy_dict
~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
159 reductor = getattr(x, "__reduce_ex__", None)
160 if reductor is not None:
--> 161 rv = reductor(4)
162 else:
163 reductor = getattr(x, "__reduce__", None)
TypeError: cannot pickle '_io.BufferedReader' object
So a custom deepcopy method it's very appriacetd.
Best regard
Luca
@lucailvec this project is, as I understand, no longer active and I suggest you try https://github.com/goodmami/wn/ instead. I saw this issue and tried it out there, but sure enough deep copy didn't work because each object contained a pointer to a shared database connection. I've restructured the code (not yet released) so this is no longer the case:
>>> import copy
>>> import wn
>>> synsets = wn.synsets("try")
>>> synset = synsets[0]
>>> synset
Synset('ewn-02535833-v')
>>> synset_copy = copy.deepcopy(synset)
>>> synset_copy is synset
False
>>> synset_copy == synset
True
>>> import pickle
>>> pickle.loads(pickle.dumps(synset))
Synset('ewn-02535833-v')
While this works, it's still not portable across machines because these objects store and use a row identifier to the backend SQL database, and this identifier is not guaranteed to be the same for different builds of the database (see goodmami/wn#84). Can you explain your needs a bit? Is the dumped transformer model distributed to others, or is it used on a single machine?
I hope to either remove the need to store the rowids or to create a custom deepcopy method to get around it, but in the meantime the current code might solve your immediate problem.