datatable
datatable copied to clipboard
Several groupby tests are failing for debug build on Windows
Several tests are failing on Windows for the following profile:
- python
3.8.10(not all the3.8pythons are failing, for instance,3.8.0doesn't fail); - datatable debug build (release build doesn't fail);
/Odcompiler flag is supplied, i.e. no optimization (other optimization levels do not fail).
Here are the failing tests (https://ci.appveyor.com/project/h2oops/datatable/builds/44409622/job/9dg7sfhyqo0bu3x9?fullLog=true):
_____________________________ test_group_boolean ______________________________
def test_group_boolean():
DT = dt.Frame(A=[True, None, False, False, True, True, False, True])
DTR = DT[:, count(), by(f.A)]
> assert_equals(DTR, dt.Frame(A=[None, False, True], count=[1, 3, 4],
stypes={"count": dt.int64}))
DT = <Frame#2cc0da0dcf0 8x1>
DTR = <Frame#2cc0da0dde0 3x2>
tests\test-groups.py:247:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
frame1 = <Frame#2cc0da0dde0 3x2>, frame2 = <Frame#2cc0da0dd20 3x2>
rel_tol = 1e-07, abs_tol = 1e-07
def assert_equals(frame1, frame2, rel_tol = 1e-7, abs_tol = None):
"""
Helper function to assert that 2 frames are equal to each other.
"""
frame_integrity_check(frame1)
frame_integrity_check(frame2)
# The default value of `abs_tol` is set to `rel_tol`
if abs_tol is None:
abs_tol = rel_tol
assert frame1.shape == frame2.shape, (
"The left frame has shape %r, while the right has shape %r"
% (frame1.shape, frame2.shape))
assert frame1.names == frame2.names, (
"The left frame has names %r, while the right has names %r"
% (frame1.names, frame2.names))
assert frame1.types == frame2.types, (
"The left frame has types %r, while the right has types %r"
% (frame1.types, frame2.types))
data1 = frame1.to_list()
data2 = frame2.to_list()
assert len(data1) == len(data2) # shape check should ensure this
for i in range(len(data1)):
col1 = data1[i]
col2 = data2[i]
assert len(col1) == len(col2)
for j in range(len(col1)):
val1 = col1[j]
val2 = col2[j]
if val1 == val2: continue
if isinstance(val1, float) and isinstance(val2, float):
if math.isclose(val1, val2, rel_tol = rel_tol, abs_tol = abs_tol): continue
if len(col1) > 16:
arr1 = repr(col1[:16])[:-1] + ", ...]"
arr2 = repr(col2[:16])[:-1] + ", ...]"
else:
arr1 = repr(col1)
arr2 = repr(col2)
> raise AssertionError(
"The frames have different data in column %d `%s` at "
"index %d: LHS has %r, and RHS has %r\n"
" LHS = %s\n"
" RHS = %s\n"
% (i, frame1.names[i], j, val1, val2, arr1, arr2))
E AssertionError: The frames have different data in column 0 `A` at index 0: LHS has False, and RHS has None
E LHS = [False, False, True]
E RHS = [None, False, True]
abs_tol = 1e-07
arr1 = '[False, False, True]'
arr2 = '[None, False, True]'
col1 = [False, False, True]
col2 = [None, False, True]
data1 = [[False, False, True], [1, 3, 4]]
data2 = [[None, False, True], [1, 3, 4]]
frame1 = <Frame#2cc0da0dde0 3x2>
frame2 = <Frame#2cc0da0dd20 3x2>
i = 0
j = 0
rel_tol = 1e-07
val1 = False
val2 = None
tests\__init__.py:140: AssertionError
_____________________________ test_group_boolean2 _____________________________
def test_group_boolean2():
DT = dt.Frame(A=[True, False, False] * 500 + [None, True])
DTR = DT[:, count(), by(f.A)]
> assert_equals(DTR, dt.Frame(A=[None, False, True], count=[1, 1000, 501],
stypes={"count": dt.int64}))
DT = <Frame#2cc0d9f65a0 1502x1>
DTR = <Frame#2cc0d9f6600 3x2>
tests\test-groups.py:253:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
frame1 = <Frame#2cc0d9f6600 3x2>, frame2 = <Frame#2cc0d9f65d0 3x2>
rel_tol = 1e-07, abs_tol = 1e-07
def assert_equals(frame1, frame2, rel_tol = 1e-7, abs_tol = None):
"""
Helper function to assert that 2 frames are equal to each other.
"""
frame_integrity_check(frame1)
frame_integrity_check(frame2)
# The default value of `abs_tol` is set to `rel_tol`
if abs_tol is None:
abs_tol = rel_tol
assert frame1.shape == frame2.shape, (
"The left frame has shape %r, while the right has shape %r"
% (frame1.shape, frame2.shape))
assert frame1.names == frame2.names, (
"The left frame has names %r, while the right has names %r"
% (frame1.names, frame2.names))
assert frame1.types == frame2.types, (
"The left frame has types %r, while the right has types %r"
% (frame1.types, frame2.types))
data1 = frame1.to_list()
data2 = frame2.to_list()
assert len(data1) == len(data2) # shape check should ensure this
for i in range(len(data1)):
col1 = data1[i]
col2 = data2[i]
assert len(col1) == len(col2)
for j in range(len(col1)):
val1 = col1[j]
val2 = col2[j]
if val1 == val2: continue
if isinstance(val1, float) and isinstance(val2, float):
if math.isclose(val1, val2, rel_tol = rel_tol, abs_tol = abs_tol): continue
if len(col1) > 16:
arr1 = repr(col1[:16])[:-1] + ", ...]"
arr2 = repr(col2[:16])[:-1] + ", ...]"
else:
arr1 = repr(col1)
arr2 = repr(col2)
> raise AssertionError(
"The frames have different data in column %d `%s` at "
"index %d: LHS has %r, and RHS has %r\n"
" LHS = %s\n"
" RHS = %s\n"
% (i, frame1.names[i], j, val1, val2, arr1, arr2))
E AssertionError: The frames have different data in column 0 `A` at index 0: LHS has False, and RHS has None
E LHS = [False, False, True]
E RHS = [None, False, True]
abs_tol = 1e-07
arr1 = '[False, False, True]'
arr2 = '[None, False, True]'
col1 = [False, False, True]
col2 = [None, False, True]
data1 = [[False, False, True], [1, 1000, 501]]
data2 = [[None, False, True], [1, 1000, 501]]
frame1 = <Frame#2cc0d9f6600 3x2>
frame2 = <Frame#2cc0d9f65d0 3x2>
i = 0
j = 0
rel_tol = 1e-07
val1 = False
val2 = None
tests\__init__.py:140: AssertionError
________________________ test_count_dt_groupby_integer ________________________
def test_count_dt_groupby_integer():
df_in = dt.Frame([9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1])
> df_reduce = df_in[:, [count(f.C0), count()], "C0"]
E AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in = <Frame#2cc0deef3c0 13x1>
tests\test-reduce.py:86: AssertionError
______________________ test_count_2d_dt_groupby_integer _______________________
def test_count_2d_dt_groupby_integer():
df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
[0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]])
> df_reduce = df_in[:, [count(f.C0), count(f.C1), count()], "C0"]
E AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in = <Frame#2cc0deb75a0 13x2>
tests\test-reduce.py:115: AssertionError
________________________ test_count_dt_groupby_string _________________________
def test_count_dt_groupby_string():
df_in = dt.Frame([None, "blue", "green", "indico", None, None, "orange",
"red", "violet", "yellow", "green", None, "blue"])
> df_reduce = df_in[:, [count(f.C0), count()], "C0"]
E AssertionError: Assertion 'ri.max() < arg.nrows()' failed in src\core\column\view.cc, line 109
df_in = <Frame#2cc0dc6a450 13x1>
tests\test-reduce.py:145: AssertionError