io
io copied to clipboard
tf 1.15 use tensorflow-io read oss file, then return empty line data
in code:
….
_NUM_PARALLEL_CALLS = 8
dataset = dataset.batch(self._data_config.batch_size)
dataset = dataset.map(self._parse_csv, num_parallel_calls=_NUM_PARALLEL_CALLS)
dataset = dataset.map(map_func=self._preprocess, num_parallel_calls=_NUM_PARALLEL_CALLS)
dataset = dataset.prefetch(buffer_size=self._prefetch_size)
…
def _parse_csv(self, line):
record_defaults = [
self.get_type_defaults(t, v)
for t, v in zip(self._input_field_types, self._input_field_defaults)
]
if self._field_names:
# decode by csv header
record_defaults = []
for field_name in self._field_names:
if field_name in self._input_fields:
tid = self._input_fields.index(field_name)
record_defaults.append(
self.get_type_defaults(self._input_field_types[tid],
self._input_field_defaults[tid]))
else:
record_defaults.append('')
def _check_data(line):
sep = self._data_config.separator
if type(sep) != type(str):
sep = sep.encode('utf-8')
field_num = len(line[0].split(sep))
assert field_num == len(record_defaults), \
'sep[%s] maybe invalid: field_num=%d, required_num=%d \n%s‘ % \
(sep, field_num, len(record_defaults), line)
return True
check_op = tf.py_func(_check_data, [line], Tout=tf.bool)
with tf.control_dependencies([check_op]):
fields = tf.decode_csv(
line,
field_delim=self._data_config.separator,
record_defaults=record_defaults,
name='decode_csv')
if self._field_names:
fields = [
fields[self._field_names.index(x)] for x in self._input_fields
]
# filter only valid fields
inputs = {self._input_fields[x]: fields[x] for x in self._effective_fids}
# filter only valid labels
for x in self._label_fids:
inputs[self._input_fields[x]] = fields[x]
return inputs
File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 480, in evaluate name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 522, in _actual_eval return _evaluate() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 511, in _evaluate output_dir=self.eval_dir(name)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1619, in _evaluate_run config=self._session_config) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/evaluation.py", line 272, in _evaluate_once session.run(eval_ops, feed_dict) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 754, in run run_metadata=run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1259, in run run_metadata=run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1360, in run raise six.reraise(*original_exc_info) File "/usr/lib/python3/dist-packages/six.py", line 693, in reraise raise value File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1345, in run return self._sess.run(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1418, in run run_metadata=run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1176, in run return self._sess.run(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 956, in run run_metadata_ptr) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1180, in _run feed_dict_tensor, options, run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.UnknownError: AssertionError: sep[b','] maybe invalid: field_num=1, required_num=20, [b'' b'' b'' b'' b'' b'' b'' b'' b'' b''] Traceback (most recent call last):