GraphDef cannot be larger than 2GB Error

Open phaniparsa opened this issue 3 years ago • 0 comments

Hello All,

When I tried to run the usage_token.py file in bilm-tf ( I am using the weights, options and vocabulary from bioelmo), I am getting this error. Please find the code below: `raw_context = [ 'Pretrained biLMs compute representations useful for NLP tasks .', 'They give state of the art performance for many tasks .' ] tokenized_context = [sentence.split() for sentence in raw_context]

options_file = "biomed_elmo_options.json" weight_file = "biomed_elmo_weights.hdf5" vocab_file = "vocabulary.txt" token_embedding_file = "bioelmo_token_embeddings.hdf5"

dump_token_embeddings(vocab_file, options_file, weight_file, token_embedding_file) tf.reset_default_graph()

batcher = TokenBatcher(vocab_file)

Input placeholders to the biLM.

context_token_ids = tf.placeholder('int32', shape=(None, None))

Build the biLM graph.

bilm = BidirectionalLanguageModel(options_file, weight_file, use_character_inputs=False, embedding_weight_file=token_embedding_file)

context_embeddings_op = bilm(context_token_ids)

elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0)

with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer())

context_ids = batcher.batch_sentences(tokenized_context)

elmo_context_input_ = sess.run([elmo_context_input["weighted_op"]], feed_dict={context_token_ids: context_ids})`

Please find below the error message:

ValueError Traceback (most recent call last) in 1 with tf.Session() as sess: 2 # It is necessary to initialize variables once before running inference. ----> 3 sess.run(tf.global_variables_initializer()) 4 5 context_ids = batcher.batch_sentences(tokenized_context)

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata) 887 try: 888 result = self._run(None, fetches, feed_dict, options_ptr, --> 889 run_metadata_ptr) 890 if run_metadata: 891 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata) 1118 if final_fetches or final_targets or (handle and feed_dict_tensor): 1119 results = self._do_run(handle, final_targets, final_fetches, -> 1120 feed_dict_tensor, options, run_metadata) 1121 else: 1122 results = []

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata) 1315 if handle is None: 1316 return self._do_call(_run_fn, self._session, feeds, fetches, targets, -> 1317 options, run_metadata) 1318 else: 1319 return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args) 1321 def _do_call(self, fn, *args): 1322 try: -> 1323 return fn(*args) 1324 except errors.OpError as e: 1325 message = compat.as_text(e.message)

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata) 1291 run_metadata): 1292 # Ensure any changes to the graph are reflected in the runtime. -> 1293 self._extend_graph() 1294 with errors.raise_exception_on_not_ok_status() as status: 1295 if self._created_with_new_api:

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/client/session.py in _extend_graph(self) 1347 graph_def, self._current_version = self._graph._as_graph_def( 1348 from_version=self._current_version, -> 1349 add_shapes=self._add_shapes) 1350 # pylint: enable=protected-access 1351

~/anaconda3/envs/bio_elmo_kernel/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _as_graph_def(self, from_version, add_shapes) 2731 bytesize += op.node_def.ByteSize() 2732 if bytesize >= (1 << 31) or bytesize < 0: -> 2733 raise ValueError("GraphDef cannot be larger than 2GB.") 2734 if self._functions: 2735 for f in self._functions.values():

ValueError: GraphDef cannot be larger than 2GB.

Upon reading about this error over internet, it appears that tensorflow is capable of handling 2GB at max for constants. URL: https://github.com/tensorflow/tensorflow/issues/5383

Any thoughts on how this error can be prevented?

Thanks!

May 18 '22 15:05 phaniparsa