deepdiff icon indicating copy to clipboard operation
deepdiff copied to clipboard

DeepDiff cannot compare two zero-dimensional `ndarrays`

Open dantp-ai opened this issue 1 year ago • 2 comments

Describe the bug DeepDiff cannot compare two zero-dimensional ndarrays (i.e., shape is ()).

To Reproduce

import numpy as np
from deepdiff import DeepDiff

a1 = np.array((1))

a2 = np.array((2))

DeepDiff(a1, a2)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[37], line 1
----> 1 DeepDiff(a1, a2)

File python3.11/site-packages/deepdiff/diff.py:324, in DeepDiff.__init__(self, t1, t2, cache_purge_level, cache_size, cache_tuning_sample_size, custom_operators, cutoff_distance_for_pairs, cutoff_intersection_for_pairs, encodings, exclude_obj_callback, exclude_obj_callback_strict, exclude_paths, include_obj_callback, include_obj_callback_strict, include_paths, exclude_regex_paths, exclude_types, get_deep_distance, group_by, group_by_sort_key, hasher, hashes, ignore_encoding_errors, ignore_nan_inequality, ignore_numeric_type_changes, ignore_order, ignore_order_func, ignore_private_variables, ignore_string_case, ignore_string_type_changes, ignore_type_in_groups, ignore_type_subclasses, iterable_compare_func, zip_ordered_iterables, log_frequency_in_sec, math_epsilon, max_diffs, max_passes, number_format_notation, number_to_string_func, progress_logger, report_repetition, significant_digits, truncate_datetime, verbose_level, view, _original_type, _parameters, _shared_parameters, **kwargs)
    320 root = DiffLevel(t1, t2, verbose_level=self.verbose_level)
    321 # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays.
    322 # The reason is that we convert the numpy array to python list and then later for distance calculations
    323 # we convert only the the last dimension of it into numpy arrays.
--> 324 self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type)
    326 if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}:
    327     self.tree['deep_distance'] = self._get_rough_distance()

File python3.11/site-packages/deepdiff/diff.py:1643, in DeepDiff._diff(self, level, parents_ids, _original_type, local_tree)
   1640     self._diff_set(level, local_tree=local_tree)
   1642 elif isinstance(level.t1, np_ndarray):
-> 1643     self._diff_numpy_array(level, parents_ids, local_tree=local_tree)
   1645 elif isinstance(level.t1, PydanticBaseModel):
   1646     self._diff_obj(level, parents_ids, local_tree=local_tree)

File python3.11/site-packages/deepdiff/diff.py:1518, in DeepDiff._diff_numpy_array(self, level, parents_ids, local_tree)
   1506 for (t1_path, t1_row), (t2_path, t2_row) in zip(
   1507         get_numpy_ndarray_rows(level.t1, shape),
   1508         get_numpy_ndarray_rows(level.t2, shape)):
   1510     new_level = level.branch_deeper(
   1511         t1_row,
   1512         t2_row,
   (...)
   1515         child_relationship_param2=t2_path,
   1516     )
-> 1518     self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type, local_tree=local_tree)

File python3.11/site-packages/deepdiff/diff.py:806, in DeepDiff._diff_iterable_in_order(self, level, parents_ids, _original_type, local_tree)
    804             self.tree[report_type] |= levels
    805 else:
--> 806     self._diff_by_forming_pairs_and_comparing_one_by_one(
    807         level,
    808         parents_ids=parents_ids,
    809         _original_type=_original_type,
    810         child_relationship_class=child_relationship_class,
    811         local_tree=local_tree,
    812     )

File python3.11/site-packages/deepdiff/diff.py:835, in DeepDiff._diff_by_forming_pairs_and_comparing_one_by_one(self, level, local_tree, parents_ids, _original_type, child_relationship_class, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
    828 def _diff_by_forming_pairs_and_comparing_one_by_one(
    829     self, level, local_tree, parents_ids=frozenset(),
    830     _original_type=None, child_relationship_class=None,
    831     t1_from_index=None, t1_to_index=None,
    832     t2_from_index=None, t2_to_index=None,
    833 ):
--> 835     for (i, j), (x, y) in self._get_matching_pairs(
    836         level, 
    837         t1_from_index=t1_from_index, t1_to_index=t1_to_index,
    838         t2_from_index=t2_from_index, t2_to_index=t2_to_index
    839     ):
    840         if self._count_diff() is StopIteration:
    841             return  # pragma: no cover. This is already covered for addition.

File python3.11/site-packages/deepdiff/diff.py:714, in DeepDiff._get_matching_pairs(self, level, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
    702 """
    703 Given a level get matching pairs. This returns list of two tuples in the form:
    704 [
   (...)
    709 Default it to compare in order
    710 """
    712 if self.iterable_compare_func is None:
    713     # Match in order if there is no compare function provided
--> 714     return self._compare_in_order(
    715         level,
    716         t1_from_index=t1_from_index, t1_to_index=t1_to_index,
    717         t2_from_index=t2_from_index, t2_to_index=t2_to_index,
    718     )
    719 try:
    720     matches = []

File python3.11/site-packages/deepdiff/diff.py:688, in DeepDiff._compare_in_order(self, level, t1_from_index, t1_to_index, t2_from_index, t2_to_index)
    682 """
    683 Default compare if `iterable_compare_func` is not provided.
    684 This will compare in sequence order.
    685 """
    686 if t1_from_index is None:
    687     return [((i, i), (x, y)) for i, (x, y) in enumerate(
--> 688         zip_longest(
    689             level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))]
    690 else:
    691     t1_chunk = level.t1[t1_from_index:t1_to_index]

TypeError: iteration over a 0-d array

Expected behavior Comparing two arrays with shape () should be possible out-of-the-box with default arguments to DeepDiff.

OS, DeepDiff version and Python version (please complete the following information):

  • OS: macOS
  • Version 13.6.7
  • Python 3.11.7 [Clang 15.0.0 (clang-1500.1.0.2.5)] on darwin
  • DeepDiff Version 7.0.1
  • Numpy Version 1.24.4

Additional context

Similar to https://github.com/seperman/deepdiff/issues/153 and https://github.com/seperman/deepdiff/issues/332 .

dantp-ai avatar Jun 10 '24 13:06 dantp-ai

Hi @dantp-ai Interesting. I was not aware of scalars in Numpy. I have only used Numpy for arrays. I will take a look when I have a chance. PRs are always very welcome too!

seperman avatar Jun 11 '24 07:06 seperman

@seperman I've updated the terminology to not get confused with Scalars. DeepDiff throws the error when comparing zero-dimensional arrays containing a scalar value. It does work perfectly well for 1-dimensional arrays containing one single element:

>>> x1 = np.array([1])
>>> x2 = np.array([2])
>>> x1.shape, x2.shape
((1,), (1,))
>>> DeepDiff(x1, x2)
{'values_changed': {'root[0]': {'new_value': 2, 'old_value': 1}}}
>>> x1 = np.array((1))
>>> x2 = np.array((2))
>>> x1.shape, x2.shape
((), ())
>>> DeepDiff(x1, x2)
...
TypeError: iteration over a 0-d array

dantp-ai avatar Jun 11 '24 07:06 dantp-ai