deepdiff
deepdiff copied to clipboard
DeepDiff cannot compare two zero-dimensional `ndarrays`
Describe the bug
DeepDiff cannot compare two zero-dimensional ndarrays (i.e., shape is ()).
To Reproduce
import numpy as np
from deepdiff import DeepDiff
a1 = np.array((1))
a2 = np.array((2))
DeepDiff(a1, a2)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[37], line 1
----> 1 DeepDiff(a1, a2)
File python3.11/site-packages/deepdiff/diff.py:324, in DeepDiff.__init__(self, t1, t2, cache_purge_level, cache_size, cache_tuning_sample_size, custom_operators, cutoff_distance_for_pairs, cutoff_intersection_for_pairs, encodings, exclude_obj_callback, exclude_obj_callback_strict, exclude_paths, include_obj_callback, include_obj_callback_strict, include_paths, exclude_regex_paths, exclude_types, get_deep_distance, group_by, group_by_sort_key, hasher, hashes, ignore_encoding_errors, ignore_nan_inequality, ignore_numeric_type_changes, ignore_order, ignore_order_func, ignore_private_variables, ignore_string_case, ignore_string_type_changes, ignore_type_in_groups, ignore_type_subclasses, iterable_compare_func, zip_ordered_iterables, log_frequency_in_sec, math_epsilon, max_diffs, max_passes, number_format_notation, number_to_string_func, progress_logger, report_repetition, significant_digits, truncate_datetime, verbose_level, view, _original_type, _parameters, _shared_parameters, **kwargs)
320 root = DiffLevel(t1, t2, verbose_level=self.verbose_level)
321 # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays.
322 # The reason is that we convert the numpy array to python list and then later for distance calculations
323 # we convert only the the last dimension of it into numpy arrays.
--> 324 self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type)
326 if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}:
327 self.tree['deep_distance'] = self._get_rough_distance()
File python3.11/site-packages/deepdiff/diff.py:1643, in DeepDiff._diff(self, level, parents_ids, _original_type, local_tree)
1640 self._diff_set(level, local_tree=local_tree)
1642 elif isinstance(level.t1, np_ndarray):
-> 1643 self._diff_numpy_array(level, parents_ids, local_tree=local_tree)
1645 elif isinstance(level.t1, PydanticBaseModel):
1646 self._diff_obj(level, parents_ids, local_tree=local_tree)
File python3.11/site-packages/deepdiff/diff.py:1518, in DeepDiff._diff_numpy_array(self, level, parents_ids, local_tree)
1506 for (t1_path, t1_row), (t2_path, t2_row) in zip(
1507 get_numpy_ndarray_rows(level.t1, shape),
1508 get_numpy_ndarray_rows(level.t2, shape)):
1510 new_level = level.branch_deeper(
1511 t1_row,
1512 t2_row,
(...)
1515 child_relationship_param2=t2_path,
1516 )
-> 1518 self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree)
File python3.11/site-packages/deepdiff/diff.py:806, in DeepDiff._diff_iterable_in_order(self, level, parents_ids, _original_type, local_tree)
804 self.tree[report_type] |= levels
805 else:
--> 806 self._diff_by_forming_pairs_and_comparing_one_by_one(
807 level,
808 parents_ids=parents_ids,
809 _original_type=_original_type,
810 child_relationship_class=child_relationship_class,
811 local_tree=local_tree,
812 )
File python3.11/site-packages/deepdiff/diff.py:835, in DeepDiff._diff_by_forming_pairs_and_comparing_one_by_one(self, level, local_tree, parents_ids, _original_type, child_relationship_class, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
828 def _diff_by_forming_pairs_and_comparing_one_by_one(
829 self, level, local_tree, parents_ids=frozenset(),
830 _original_type=None, child_relationship_class=None,
831 t1_from_index=None, t1_to_index=None,
832 t2_from_index=None, t2_to_index=None,
833 ):
--> 835 for (i, j), (x, y) in self._get_matching_pairs(
836 level,
837 t1_from_index=t1_from_index, t1_to_index=t1_to_index,
838 t2_from_index=t2_from_index, t2_to_index=t2_to_index
839 ):
840 if self._count_diff() is StopIteration:
841 return # pragma: no cover. This is already covered for addition.
File python3.11/site-packages/deepdiff/diff.py:714, in DeepDiff._get_matching_pairs(self, level, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
702 """
703 Given a level get matching pairs. This returns list of two tuples in the form:
704 [
(...)
709 Default it to compare in order
710 """
712 if self.iterable_compare_func is None:
713 # Match in order if there is no compare function provided
--> 714 return self._compare_in_order(
715 level,
716 t1_from_index=t1_from_index, t1_to_index=t1_to_index,
717 t2_from_index=t2_from_index, t2_to_index=t2_to_index,
718 )
719 try:
720 matches = []
File python3.11/site-packages/deepdiff/diff.py:688, in DeepDiff._compare_in_order(self, level, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
682 """
683 Default compare if `iterable_compare_func` is not provided.
684 This will compare in sequence order.
685 """
686 if t1_from_index is None:
687 return [((i, i), (x, y)) for i, (x, y) in enumerate(
--> 688 zip_longest(
689 level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))]
690 else:
691 t1_chunk = level.t1[t1_from_index:t1_to_index]
TypeError: iteration over a 0-d array
Expected behavior
Comparing two arrays with shape () should be possible out-of-the-box with default arguments to DeepDiff.
OS, DeepDiff version and Python version (please complete the following information):
- OS: macOS
- Version 13.6.7
- Python 3.11.7 [Clang 15.0.0 (clang-1500.1.0.2.5)] on darwin
- DeepDiff Version 7.0.1
- Numpy Version 1.24.4
Additional context
Similar to https://github.com/seperman/deepdiff/issues/153 and https://github.com/seperman/deepdiff/issues/332 .
Hi @dantp-ai Interesting. I was not aware of scalars in Numpy. I have only used Numpy for arrays. I will take a look when I have a chance. PRs are always very welcome too!
@seperman I've updated the terminology to not get confused with Scalars. DeepDiff throws the error when comparing zero-dimensional arrays containing a scalar value. It does work perfectly well for 1-dimensional arrays containing one single element:
>>> x1 = np.array([1])
>>> x2 = np.array([2])
>>> x1.shape, x2.shape
((1,), (1,))
>>> DeepDiff(x1, x2)
{'values_changed': {'root[0]': {'new_value': 2, 'old_value': 1}}}
>>> x1 = np.array((1))
>>> x2 = np.array((2))
>>> x1.shape, x2.shape
((), ())
>>> DeepDiff(x1, x2)
...
TypeError: iteration over a 0-d array