ranking
ranking copied to clipboard
Keras Example with custom dataset_reader does not work: tf__parse_from_example_list() got multiple values for argument 'list_size'
From the following Keras example, I changed the dataset reader in dataset_hparams
to use a custom function dataset_reader=get_dataset
(instead of tf.data.TFRecordDataset
). get_dataset
with a parsing function reads the text files. It fails at a later stage in train_and_validate
See the code HERE
$ ranking_pipeline.train_and_validate(verbose=1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-31-7026184516c0> in <module>
----> 1 ranking_pipeline.train_and_validate(verbose=1)
/usr/local/lib/python3.8/site-packages/tensorflow_ranking/python/keras/pipeline.py in train_and_validate(self, verbose)
593 # Otherwise, MultiWorkerMirroredStrategy will fail.
594 train_dataset, valid_dataset = (
--> 595 self._dataset_builder.build_train_dataset(),
596 self._dataset_builder.build_valid_dataset())
597 model.fit(
/usr/local/lib/python3.8/site-packages/tensorflow_ranking/python/keras/pipeline.py in build_train_dataset(self)
961 """See `AbstractDatasetBuilder`."""
962 train_list_size = self._hparams.list_size
--> 963 return self._build_dataset(
964 file_pattern=self._hparams.train_input_pattern,
965 batch_size=self._hparams.train_batch_size,
/usr/local/lib/python3.8/site-packages/tensorflow_ranking/python/keras/pipeline.py in _build_dataset(self, file_pattern, batch_size, list_size, randomize_input, num_epochs)
930 """
931 # TODO: Remove defaults common in Estimator pipeline and here.
--> 932 dataset = data.build_ranking_dataset(
933 file_pattern=file_pattern,
934 data_format=data.ELWC,
/usr/local/lib/python3.8/site-packages/tensorflow_ranking/python/data.py in build_ranking_dataset(file_pattern, data_format, batch_size, context_feature_spec, example_feature_spec, list_size, size_feature_name, mask_feature_name, shuffle_examples, seed, **kwargs)
1013 seed=seed)
1014
-> 1015 return build_ranking_dataset_with_parsing_fn(
1016 file_pattern, parsing_fn=parsing_fn, batch_size=batch_size, **kwargs)
1017
/usr/local/lib/python3.8/site-packages/tensorflow_ranking/python/data.py in build_ranking_dataset_with_parsing_fn(file_pattern, parsing_fn, batch_size, reader, reader_args, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed, prefetch_buffer_size, reader_num_threads, sloppy_ordering, drop_final_batch, num_parser_threads)
957
958 # Parse a batch.
--> 959 dataset = dataset.map(parsing_fn, num_parallel_calls=num_parser_threads)
960
961 # Prefetching allows for data fetching to happen on host while model runs
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls, deterministic)
1925 return MapDataset(self, map_func, preserve_cardinality=True)
1926 else:
-> 1927 return ParallelMapDataset(
1928 self,
1929 map_func,
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, num_parallel_calls, deterministic, use_inter_op_parallelism, preserve_cardinality, use_legacy_function)
4520 self._input_dataset = input_dataset
4521 self._use_inter_op_parallelism = use_inter_op_parallelism
-> 4522 self._map_func = StructuredFunctionWrapper(
4523 map_func,
4524 self._transformation_name(),
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
3710 resource_tracker = tracking.ResourceTracker()
3711 with tracking.resource_tracker_scope(resource_tracker):
-> 3712 self._function = fn_factory()
3713 # There is no graph to add in eager mode.
3714 add_to_graph &= not context.executing_eagerly()
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in get_concrete_function(self, *args, **kwargs)
3132 or `tf.Tensor` or `tf.TensorSpec`.
3133 """
-> 3134 graph_function = self._get_concrete_function_garbage_collected(
3135 *args, **kwargs)
3136 graph_function._garbage_collector.release() # pylint: disable=protected-access
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs)
3098 args, kwargs = None, None
3099 with self._lock:
-> 3100 graph_function, _ = self._maybe_define_function(args, kwargs)
3101 seen_names = set()
3102 captured = object_identity.ObjectIdentitySet(
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3442
3443 self._function_cache.missed.add(call_context_key)
-> 3444 graph_function = self._create_graph_function(args, kwargs)
3445 self._function_cache.primary[cache_key] = graph_function
3446
/usr/local/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3277 arg_names = base_arg_names + missing_arg_names
3278 graph_function = ConcreteFunction(
-> 3279 func_graph_module.func_graph_from_py_func(
3280 self._name,
3281 self._python_function,
/usr/local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
997 _, original_func = tf_decorator.unwrap(python_func)
998
--> 999 func_outputs = python_func(*func_args, **func_kwargs)
1000
1001 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in wrapped_fn(*args)
3685 attributes=defun_kwargs)
3686 def wrapped_fn(*args): # pylint: disable=missing-docstring
-> 3687 ret = wrapper_helper(*args)
3688 ret = structure.to_tensor_list(self._output_structure, ret)
3689 return [ops.convert_to_tensor(t) for t in ret]
/usr/local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in wrapper_helper(*args)
3615 if not _should_unpack(nested_args):
3616 nested_args = (nested_args,)
-> 3617 ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
3618 if _should_pack(ret):
3619 ret = tuple(ret)
/usr/local/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
693 except Exception as e: # pylint:disable=broad-except
694 if hasattr(e, 'ag_error_metadata'):
--> 695 raise e.ag_error_metadata.to_exception(e)
696 else:
697 raise
TypeError: in user code:
TypeError: tf__parse_from_example_list() got multiple values for argument 'list_size'
@dazcona Thank you for your issue. Based on my reproduce, the error is from the fact that our pipeline works only on the data saved in the ELWC format (treated inside tfr.data). For your test data in the libsvm format, we need to generate an ELWC data copy and then train on that, see my example here