When I run the last step using command1, the error " Dst tensor is not initialized" happens. If I using command2 with absolute path, the error "join() argument must be str or bytes, not 'NoneType'"happens,dose anybody knows how to solve it?
command1: python DAN_V2.py -ds 2 --data_dir=./test_images_pre_out/ --data_dir_test=./test_images/ -nlm 68 -mode eval
command2: python DAN_V2.py -ds 2 --data_dir=/home/.../DAN_V2/test_images_pre_out/ -nlm 68 -mode eval
command1 error:
2018-11-22 18:39:34.989433: W tensorflow/core/common_runtime/bfc_allocator.cc:279] **********************************************************************************xxxxxxxxxxx
2018-11-22 18:39:34.989452: W tensorflow/core/framework/op_kernel.cc:1318] OP_REQUIRES failed at assig117 : Resource exhausted: OOM when allocating tensor with shape[25088,256] and type float on /job:locaeplica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
Traceback (most recent call last):
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1322, in _do_call
return fn(*args)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: save/RestoreV2/_555 = _Recvclient_terminated=false, recv_device="/job:localhost/replask:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnatiensor_name="edge_560_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/dPU:0"]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "DAN_V2.py", line 143, in
tf.app.run(argv=sys.argv)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/platp.py", line 125, in run
_sys.exit(main(argv))
File "DAN_V2.py", line 139, in main
dan_run_loop.dan_main(flags,vgg16_model_fn,input_function)
File "/home.../DAN_V2/dan_run_loop.py", line 194, in dan_main
eval_results = estimator.evaluate(input_fn=input_fn_eval,steps=flags.max_train_steps)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estistimator.py", line 460, in evaluate
output_dir=self.eval_dir(name))
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estistimator.py", line 1386, in _evaluate_run
config=self._session_config)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/traialuation.py", line 209, in _evaluate_once
session_creator=session_creator, hooks=hooks) as session:
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 826, in init
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 549, in init
self._sess = _RecoverableSession(self._coordinated_creator)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 1012, in init
_WrappedSession.init(self, self._create_session())
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 1017, in _create_session
return self._sess_creator.create_session()
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 706, in create_session
self.tf_sess = self._session_creator.create_session()
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/trainitored_session.py", line 477, in create_session
init_fn=self._scaffold.init_fn)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/traission_manager.py", line 281, in prepare_session
config=config)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/traission_manager.py", line 195, in _restore_checkpoint
saver.restore(sess, checkpoint_filename_with_path)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/traiver.py", line 1752, in restore
{self.saver_def.filename_tensor_name: save_path})
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 900, in run
run_metadata_ptr)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1316, in _do_run
run_metadata)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/clieion.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: save/RestoreV2/_555 = _Recvclient_terminated=false, recv_device="/job:localhost/replask:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnatiensor_name="edge_560_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/dPU:0"]]
command2: python DAN_V2.py -ds 2 --data_dir=/home/.../DAN_V2/test_images_pre_out/ -nlm 68 -mode eval
error2:
INFO:tensorflow:Using config: {'_master': '', '_num_worker_replicas': 1, '_device_fn': None, '_service': None, '_save_checkpoints_secs': 1000000000.0, '_session_config': allow_soft_placement: true
, '_is_chief': True, '_evaluation_master': '', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff281278048>, '_keep_checkpoint_every_n_hours': 10000, '_global_id_in_cluster': 0, '_num_ps_replicas': 0, '_model_dir': './model_dir', '_keep_checkpoint_max': 5, '_task_type': 'worker', '_tf_random_seed': None, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_train_distribute': None, '_task_id': 0}
Traceback (most recent call last):
File "DAN_V2.py", line 143, in
tf.app.run(argv=sys.argv)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/platform/app.py", line 125, in run
_sys.exit(main(argv))
File "DAN_V2.py", line 139, in main
dan_run_loop.dan_main(flags,vgg16_model_fn,input_function)
File "/home/.../DAN_V2/dan_run_loop.py", line 194, in dan_main
eval_results = estimator.evaluate(input_fn=input_fn_eval,steps=flags.max_train_steps)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 453, in evaluate
input_fn, hooks, checkpoint_path)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1346, in _evaluate_build_graph
model_fn_lib.ModeKeys.EVAL))
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 985, in _get_features_and_labels_from_input_fn
result = self._call_input_fn(input_fn, mode)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/site-packages/tensorflow/python/estimator/estimator.py", line 1074, in _call_input_fn
return input_fn(**kwargs)
File "/home/.../DAN_V2/dan_run_loop.py", line 186, in input_fn_eval
1, flags.num_parallel_calls, flags.multi_gpu)
File "DAN_V2.py", line 52, in vgg16_input_fn
img_path,pts_path = get_filenames(data_dir)
File "DAN_V2.py", line 37, in get_filenames
p = os.path.join(data_dir, ext)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/posixpath.py", line 89, in join
genericpath._check_arg_types('join', a, *p)
File "/usr/local/public/anaconda2/envs/tf1.4_py35/lib/python3.5/genericpath.py", line 143, in _check_arg_types
(funcname, s.class.name)) from None
TypeError: join() argument must be str or bytes, not 'NoneType'