datatable icon indicating copy to clipboard operation
datatable copied to clipboard

Several groupby tests are failing for debug build on Windows

Open oleksiyskononenko opened this issue 3 years ago • 0 comments

Several tests are failing on Windows for the following profile:

  • python 3.8.10 (not all the 3.8 pythons are failing, for instance, 3.8.0 doesn't fail);
  • datatable debug build (release build doesn't fail);
  • /Od compiler flag is supplied, i.e. no optimization (other optimization levels do not fail).

Here are the failing tests (https://ci.appveyor.com/project/h2oops/datatable/builds/44409622/job/9dg7sfhyqo0bu3x9?fullLog=true):

_____________________________ test_group_boolean ______________________________
    def test_group_boolean():
        DT = dt.Frame(A=[True, None, False, False, True, True, False, True])
        DTR = DT[:, count(), by(f.A)]
>       assert_equals(DTR, dt.Frame(A=[None, False, True], count=[1, 3, 4],
                                    stypes={"count": dt.int64}))
DT         = <Frame#2cc0da0dcf0 8x1>
DTR        = <Frame#2cc0da0dde0 3x2>
tests\test-groups.py:247: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
frame1 = <Frame#2cc0da0dde0 3x2>, frame2 = <Frame#2cc0da0dd20 3x2>
rel_tol = 1e-07, abs_tol = 1e-07
    def assert_equals(frame1, frame2, rel_tol = 1e-7, abs_tol = None):
        """
        Helper function to assert that 2 frames are equal to each other.
        """
        frame_integrity_check(frame1)
        frame_integrity_check(frame2)
    
        # The default value of `abs_tol` is set to `rel_tol`
        if abs_tol is None:
            abs_tol = rel_tol
    
        assert frame1.shape == frame2.shape, (
            "The left frame has shape %r, while the right has shape %r"
            % (frame1.shape, frame2.shape))
    
        assert frame1.names == frame2.names, (
            "The left frame has names %r, while the right has names %r"
            % (frame1.names, frame2.names))
        assert frame1.types == frame2.types, (
            "The left frame has types %r, while the right has types %r"
            % (frame1.types, frame2.types))
        data1 = frame1.to_list()
        data2 = frame2.to_list()
        assert len(data1) == len(data2)  # shape check should ensure this
        for i in range(len(data1)):
            col1 = data1[i]
            col2 = data2[i]
            assert len(col1) == len(col2)
            for j in range(len(col1)):
                val1 = col1[j]
                val2 = col2[j]
                if val1 == val2: continue
                if isinstance(val1, float) and isinstance(val2, float):
                    if math.isclose(val1, val2, rel_tol = rel_tol, abs_tol = abs_tol): continue
                if len(col1) > 16:
                    arr1 = repr(col1[:16])[:-1] + ", ...]"
                    arr2 = repr(col2[:16])[:-1] + ", ...]"
                else:
                    arr1 = repr(col1)
                    arr2 = repr(col2)
>               raise AssertionError(
                    "The frames have different data in column %d `%s` at "
                    "index %d: LHS has %r, and RHS has %r\n"
                    "  LHS = %s\n"
                    "  RHS = %s\n"
                    % (i, frame1.names[i], j, val1, val2, arr1, arr2))
E               AssertionError: The frames have different data in column 0 `A` at index 0: LHS has False, and RHS has None
E                 LHS = [False, False, True]
E                 RHS = [None, False, True]
abs_tol    = 1e-07
arr1       = '[False, False, True]'
arr2       = '[None, False, True]'
col1       = [False, False, True]
col2       = [None, False, True]
data1      = [[False, False, True], [1, 3, 4]]
data2      = [[None, False, True], [1, 3, 4]]
frame1     = <Frame#2cc0da0dde0 3x2>
frame2     = <Frame#2cc0da0dd20 3x2>
i          = 0
j          = 0
rel_tol    = 1e-07
val1       = False
val2       = None
tests\__init__.py:140: AssertionError
_____________________________ test_group_boolean2 _____________________________
    def test_group_boolean2():
        DT = dt.Frame(A=[True, False, False] * 500 + [None, True])
        DTR = DT[:, count(), by(f.A)]
>       assert_equals(DTR, dt.Frame(A=[None, False, True], count=[1, 1000, 501],
                                    stypes={"count": dt.int64}))
DT         = <Frame#2cc0d9f65a0 1502x1>
DTR        = <Frame#2cc0d9f6600 3x2>
tests\test-groups.py:253: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
frame1 = <Frame#2cc0d9f6600 3x2>, frame2 = <Frame#2cc0d9f65d0 3x2>
rel_tol = 1e-07, abs_tol = 1e-07
    def assert_equals(frame1, frame2, rel_tol = 1e-7, abs_tol = None):
        """
        Helper function to assert that 2 frames are equal to each other.
        """
        frame_integrity_check(frame1)
        frame_integrity_check(frame2)
    
        # The default value of `abs_tol` is set to `rel_tol`
        if abs_tol is None:
            abs_tol = rel_tol
    
        assert frame1.shape == frame2.shape, (
            "The left frame has shape %r, while the right has shape %r"
            % (frame1.shape, frame2.shape))
    
        assert frame1.names == frame2.names, (
            "The left frame has names %r, while the right has names %r"
            % (frame1.names, frame2.names))
        assert frame1.types == frame2.types, (
            "The left frame has types %r, while the right has types %r"
            % (frame1.types, frame2.types))
        data1 = frame1.to_list()
        data2 = frame2.to_list()
        assert len(data1) == len(data2)  # shape check should ensure this
        for i in range(len(data1)):
            col1 = data1[i]
            col2 = data2[i]
            assert len(col1) == len(col2)
            for j in range(len(col1)):
                val1 = col1[j]
                val2 = col2[j]
                if val1 == val2: continue
                if isinstance(val1, float) and isinstance(val2, float):
                    if math.isclose(val1, val2, rel_tol = rel_tol, abs_tol = abs_tol): continue
                if len(col1) > 16:
                    arr1 = repr(col1[:16])[:-1] + ", ...]"
                    arr2 = repr(col2[:16])[:-1] + ", ...]"
                else:
                    arr1 = repr(col1)
                    arr2 = repr(col2)
>               raise AssertionError(
                    "The frames have different data in column %d `%s` at "
                    "index %d: LHS has %r, and RHS has %r\n"
                    "  LHS = %s\n"
                    "  RHS = %s\n"
                    % (i, frame1.names[i], j, val1, val2, arr1, arr2))
E               AssertionError: The frames have different data in column 0 `A` at index 0: LHS has False, and RHS has None
E                 LHS = [False, False, True]
E                 RHS = [None, False, True]
abs_tol    = 1e-07
arr1       = '[False, False, True]'
arr2       = '[None, False, True]'
col1       = [False, False, True]
col2       = [None, False, True]
data1      = [[False, False, True], [1, 1000, 501]]
data2      = [[None, False, True], [1, 1000, 501]]
frame1     = <Frame#2cc0d9f6600 3x2>
frame2     = <Frame#2cc0d9f65d0 3x2>
i          = 0
j          = 0
rel_tol    = 1e-07
val1       = False
val2       = None
tests\__init__.py:140: AssertionError
________________________ test_count_dt_groupby_integer ________________________
    def test_count_dt_groupby_integer():
        df_in = dt.Frame([9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1])
>       df_reduce = df_in[:, [count(f.C0), count()], "C0"]
E       AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in      = <Frame#2cc0deef3c0 13x1>
tests\test-reduce.py:86: AssertionError
______________________ test_count_2d_dt_groupby_integer _______________________
    def test_count_2d_dt_groupby_integer():
        df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
                          [0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]])
>       df_reduce = df_in[:, [count(f.C0), count(f.C1), count()], "C0"]
E       AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in      = <Frame#2cc0deb75a0 13x2>
tests\test-reduce.py:115: AssertionError
________________________ test_count_dt_groupby_string _________________________
    def test_count_dt_groupby_string():
        df_in = dt.Frame([None, "blue", "green", "indico", None, None, "orange",
                          "red", "violet", "yellow", "green", None, "blue"])
>       df_reduce = df_in[:, [count(f.C0), count()], "C0"]
E       AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in      = <Frame#2cc0dc6a450 13x1>
tests\test-reduce.py:145: AssertionError

oleksiyskononenko avatar Aug 08 '22 22:08 oleksiyskononenko