python-bigquery-pandas tests.system.test_read_gbq: test_default_dtypes[scalar-types-nonnull-normal-range-False] failed

tests.system.test_read_gbq: test_default_dtypes[scalar-types-nonnull-normal-range-False] failed

Open flaky-bot[bot] opened this issue 3 years ago • 1 comments
This test failed!
To configure my behavior, see the Flaky Bot documentation.
If I'm commenting on this issue too often, add the flakybot: quiet label and I will stop commenting.
commit: 439d5888550f1ad11418b65fe84f3f48434d3515 buildURL: Build Status, Sponge status: failed
Test output
read_gbq = functools.partial(, project_id='precise-truck-742', credentials=)
query = '\nSELECT\n  bools.row_num AS row_num,\n  bool_col,\n  bytes_col,\n  date_col,\n  datetime_col,\n  float_col,\n  int64...bools`.row_num = `times`.row_num\n  AND `bools`.row_num = `timestamps`.row_num\nORDER BY row_num ASC\n                '
expected =    row_num  bool_col  ...         time_col                    timestamp_col
0        1      True  ...         00:00:00...0:01:02.345678+00:00
2        3      True  ...  23:59:58.999271 2018-04-11 23:59:59.999999+00:00
[3 rows x 11 columns]
use_bqstorage_apis = {False, True}, use_bqstorage_api = False
@pytest.mark.parametrize(["use_bqstorage_api"], [(True,), (False,)])
@pytest.mark.parametrize(
    ["query", "expected", "use_bqstorage_apis"],
    [
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, TRUE AS bool_col),
      STRUCT(2 AS row_num, FALSE AS bool_col),
      STRUCT(3 AS row_num, TRUE AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('C00010FF' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(2 AS row_num, CAST('F1AC' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(3 AS row_num, CAST('FFBADD11' AS BYTES FORMAT 'HEX') AS bytes_co) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATE(1998, 9, 4) AS date_col),
      STRUCT(2 AS row_num, DATE(2011, 10, 1) AS date_col),
      STRUCT(3 AS row_num, DATE(2018, 4, 11) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('1998-09-04 12:34:56.789101') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('2011-10-01 00:01:02.345678') AS datetime_col),
      STRUCT(3 AS row_num, DATETIME('2018-04-11 23:59:59.999999') AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, 1.125 AS float_col),
      STRUCT(2 AS row_num, -2.375 AS float_col),
      STRUCT(3 AS row_num, 0.0 AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      -- 2 ^ 63 - 1, but in hex to avoid intermediate overlfow.
      STRUCT(1 AS row_num, 0x7fffffffffffffff AS int64_col),
      STRUCT(2 AS row_num, -1 AS in64_col),
      -- -2 ^ 63, but in hex to avoid intermediate overlfow.
      STRUCT(3 AS row_num, -0x8000000000000000 AS int64_col) ]) AS `ints`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(2 AS row_num, CAST('-123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(3 AS row_num, CAST('999.999999' AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, 'abcdefghijklmnopqrstuvwxyz' AS string_col),
      STRUCT(2 AS row_num, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' AS string_col),
      STRUCT(3 AS row_num, 'こんにちは' AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('00:00:00.000000' AS TIME) AS time_col),
      STRUCT(2 AS row_num, CAST('09:08:07.654321' AS TIME) AS time_col),
      STRUCT(3 AS row_num, CAST('23:59:59.999999' AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('1998-09-04 12:34:56.789101') AS timestamp_col),
      STRUCT(2 AS row_num, TIMESTAMP('2011-10-01 00:01:02.345678') AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2018-04-11 23:59:59.999999') AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `ints`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
                """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [True, False, True],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "bool",
                        ),
                        "bytes_col": [
                            bytes.fromhex("C00010FF"),
                            bytes.fromhex("F1AC"),
                            bytes.fromhex("FFBADD11"),
                        ],
                        "date_col": pandas.Series(
                            [
                                datetime.date(1998, 9, 4),
                                datetime.date(2011, 10, 1),
                                datetime.date(2018, 4, 11),
                            ],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [
                                "1998-09-04 12:34:56.789101",
                                "2011-10-01 00:01:02.345678",
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": [1.125, -2.375, 0.0],
                        "int64_col": pandas.Series(
                            [(2**63) - 1, -1, -(2**63)], dtype="Int64"
                        ),
                        "numeric_col": [
                            decimal.Decimal("123.456789"),
                            decimal.Decimal("-123.456789"),
                            decimal.Decimal("999.999999"),
                        ],
                        "string_col": [
                            "abcdefghijklmnopqrstuvwxyz",
                            "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                            "こんにちは",
                        ],
                        "time_col": pandas.Series(
                            ["00:00:00.000000", "09:08:07.654321", "23:59:59.999999"],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                "1998-09-04 12:34:56.789101",
                                "2011-10-01 00:01:02.345678",
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-nonnull-normal-range",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, TRUE AS bool_col),
      STRUCT(2 AS row_num, FALSE AS bool_col),
      STRUCT(3 AS row_num, NULL AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS bytes_col),
      STRUCT(2 AS row_num, CAST('F1AC' AS BYTES FORMAT 'HEX') AS bytes_col),
      STRUCT(3 AS row_num, CAST('' AS BYTES FORMAT 'HEX') AS bytes_co) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATE(1970, 1, 1) AS date_col),
      STRUCT(2 AS row_num, NULL AS date_col),
      STRUCT(3 AS row_num, DATE(2018, 4, 11) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('1970-01-01 00:00:00.000000') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('2011-10-01 00:01:02.345678') AS datetime_col),
      STRUCT(3 AS row_num, NULL AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS float_col),
      STRUCT(2 AS row_num, -2.375 AS float_col),
      STRUCT(3 AS row_num, 0.0 AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, -1 AS int64_col),
      STRUCT(2 AS row_num, NULL AS int64_col),
      STRUCT(3 AS row_num, 0 AS int64_col) ]) AS `int64s`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123.456789' AS NUMERIC) AS numeric_col),
      STRUCT(2 AS row_num, NULL AS numeric_col),
      STRUCT(3 AS row_num, CAST('999.999999' AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, '' AS string_col),
      STRUCT(2 AS row_num, 'こんにちは' AS string_col),
      STRUCT(3 AS row_num, NULL AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, NULL AS time_col),
      STRUCT(2 AS row_num, CAST('00:00:00.000000' AS TIME) AS time_col),
      STRUCT(3 AS row_num, CAST('23:59:59.999999' AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('1970-01-01 00:00:00.000000') AS timestamp_col),
      STRUCT(2 AS row_num, NULL AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2018-04-11 23:59:59.999999') AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `int64s`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [True, False, None],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "object",
                        ),
                        "bytes_col": [None, bytes.fromhex("F1AC"), b""],
                        "date_col": pandas.Series(
                            [
                                datetime.date(1970, 1, 1),
                                None,
                                datetime.date(2018, 4, 11),
                            ],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [
                                "1970-01-01 00:00:00.000000",
                                "2011-10-01 00:01:02.345678",
                                None,
                            ],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": [None, -2.375, 0.0],
                        "int64_col": pandas.Series([-1, None, 0], dtype="Int64"),
                        "numeric_col": [
                            decimal.Decimal("123.456789"),
                            None,
                            decimal.Decimal("999.999999"),
                        ],
                        "string_col": ["", "こんにちは", None],
                        "time_col": pandas.Series(
                            [None, "00:00:00", "23:59:59.999999"],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                "1970-01-01 00:00:00.000000",
                                None,
                                "2018-04-11 23:59:59.999999",
                            ],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-nullable-normal-range",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bools.row_num AS row_num,
  bool_col,
  bytes_col,
  date_col,
  datetime_col,
  float_col,
  int64_col,
  numeric_col,
  string_col,
  time_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BOOL) AS bool_col) ]) AS `bools`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BYTES) AS bytes_col) ]) AS `bytes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS DATE) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS DATETIME) AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS FLOAT64) AS float_col) ]) AS `floats`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS INT64) AS int64_col) ]) AS `int64s`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS NUMERIC) AS numeric_col) ]) AS `numerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS STRING) AS string_col) ]) AS `strings`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS TIME) AS time_col) ]) AS `times`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS TIMESTAMP) AS timestamp_col) ]) AS `timestamps`
WHERE
  `bools`.row_num = `dates`.row_num
  AND `bools`.row_num = `bytes`.row_num
  AND `bools`.row_num = `datetimes`.row_num
  AND `bools`.row_num = `floats`.row_num
  AND `bools`.row_num = `int64s`.row_num
  AND `bools`.row_num = `numerics`.row_num
  AND `bools`.row_num = `strings`.row_num
  AND `bools`.row_num = `times`.row_num
  AND `bools`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1], dtype="Int64"),
                        "bool_col": pandas.Series(
                            [None],
                            dtype="boolean"
                            if FEATURES.pandas_has_boolean_dtype
                            else "object",
                        ),
                        "bytes_col": [None],
                        "date_col": pandas.Series(
                            [None],
                            dtype=db_dtypes.DateDtype(),
                        ),
                        "datetime_col": pandas.Series(
                            [None],
                            dtype="datetime64[ns]",
                        ),
                        "float_col": pandas.Series([None], dtype="float64"),
                        "int64_col": pandas.Series([None], dtype="Int64"),
                        "numeric_col": [None],
                        "string_col": [None],
                        "time_col": pandas.Series(
                            [None],
                            dtype=db_dtypes.TimeDtype(),
                        ),
                        "timestamp_col": pandas.Series(
                            [None],
                            dtype="datetime64[ns]",
                        ).dt.tz_localize(datetime.timezone.utc),
                    }
                ),
            ),
            id="scalar-types-null",
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  bignumerics.row_num AS row_num,
  bignumeric_col,
  nullable_col,
  null_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, CAST('123456789.123456789' AS BIGNUMERIC) AS bignumeric_col),
      STRUCT(2 AS row_num, CAST('-123456789.123456789' AS BIGNUMERIC) AS bignumeric_col),
      STRUCT(3 AS row_num, CAST('987654321.987654321' AS BIGNUMERIC) AS bignumeric_col) ]) AS `bignumerics`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST('123456789.123456789' AS BIGNUMERIC) AS nullable_col),
      STRUCT(2 AS row_num, NULL AS nullable_col),
      STRUCT(3 AS row_num, CAST('987654321.987654321' AS BIGNUMERIC) AS nullable_col) ]) AS `nullables`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col),
      STRUCT(2 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col),
      STRUCT(3 AS row_num, CAST(NULL AS BIGNUMERIC) AS null_col) ]) AS `nulls`
WHERE
  `bignumerics`.row_num = `nullables`.row_num
  AND `bignumerics`.row_num = `nulls`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        # TODO: Support a special (nullable) dtype for decimal data.
                        # https://github.com/googleapis/python-db-dtypes-pandas/issues/49
                        "bignumeric_col": [
                            decimal.Decimal("123456789.123456789"),
                            decimal.Decimal("-123456789.123456789"),
                            decimal.Decimal("987654321.987654321"),
                        ],
                        "nullable_col": [
                            decimal.Decimal("123456789.123456789"),
                            None,
                            decimal.Decimal("987654321.987654321"),
                        ],
                        "null_col": [None, None, None],
                    }
                ),
            ),
            id="bignumeric-normal-range",
            marks=pytest.mark.skipif(
                not FEATURES.bigquery_has_bignumeric,
                reason="BIGNUMERIC not supported in this version of google-cloud-bigquery",
            ),
        ),
        pytest.param(
            *QueryTestCase(
                query="""
SELECT
  dates.row_num AS row_num,
  date_col,
  datetime_col,
  timestamp_col
FROM
  UNNEST([
      STRUCT(1 AS row_num, DATE(1, 1, 1) AS date_col),
      STRUCT(2 AS row_num, DATE(9999, 12, 31) AS date_col),
      STRUCT(3 AS row_num, DATE(2262, 4, 12) AS date_col) ]) AS `dates`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, DATETIME('0001-01-01 00:00:00.000000') AS datetime_col),
      STRUCT(2 AS row_num, DATETIME('9999-12-31 23:59:59.999999') AS datetime_col),
      STRUCT(3 AS row_num, DATETIME('2262-04-11 23:47:16.854776') AS datetime_col) ]) AS `datetimes`
INNER JOIN
  UNNEST([
      STRUCT(1 AS row_num, TIMESTAMP('0001-01-01 00:00:00.000000') AS timestamp_col),
      STRUCT(2 AS row_num, TIMESTAMP('9999-12-31 23:59:59.999999') AS timestamp_col),
      STRUCT(3 AS row_num, TIMESTAMP('2262-04-11 23:47:16.854776') AS timestamp_col) ]) AS `timestamps`
WHERE
  `dates`.row_num = `datetimes`.row_num
  AND `dates`.row_num = `timestamps`.row_num
ORDER BY row_num ASC
            """,
                expected=pandas.DataFrame(
                    {
                        "row_num": pandas.Series([1, 2, 3], dtype="Int64"),
                        "date_col": pandas.Series(
                            [
                                datetime.date(1, 1, 1),
                                datetime.date(9999, 12, 31),
                                datetime.date(2262, 4, 12),
                            ],
                            dtype="object",
                        ),
                        "datetime_col": pandas.Series(
                            [
                                datetime.datetime(1, 1, 1, 0, 0, 0, 0),
                                datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
                                # One microsecond more than pandas.Timestamp.max.
                                datetime.datetime(2262, 4, 11, 23, 47, 16, 854776),
                            ],
                            dtype="object",
                        ),
                        "timestamp_col": pandas.Series(
                            [
                                datetime.datetime(
                                    1, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc
                                ),
                                datetime.datetime(
                                    9999,
                                    12,
                                    31,
                                    23,
                                    59,
                                    59,
                                    999999,
                                    tzinfo=datetime.timezone.utc,
                                ),
                                # One microsecond more than pandas.Timestamp.max.
                                datetime.datetime(
                                    2262,
                                    4,
                                    11,
                                    23,
                                    47,
                                    16,
                                    854776,
                                    tzinfo=datetime.timezone.utc,
                                ),
                            ],
                            dtype="object",
                        ),
                    }
                ),
                use_bqstorage_apis={True, False}
                if FEATURES.bigquery_has_accurate_timestamp
                else {True},
            ),
            id="issue365-extreme-datetimes",
        ),
    ],
)
def test_default_dtypes(
    read_gbq, query, expected, use_bqstorage_apis, use_bqstorage_api
):
    if use_bqstorage_api not in use_bqstorage_apis:
        pytest.skip(f"use_bqstorage_api={use_bqstorage_api} not supported.")
    result = read_gbq(query, use_bqstorage_api=use_bqstorage_api)


  pandas.testing.assert_frame_equal(result, expected)


tests/system/test_read_gbq.py:551:

.nox/prerelease/lib/python3.8/site-packages/pandas/_testing/asserters.py:851: in assert_extension_array_equal
_testing.assert_almost_equal(
pandas/_libs/testing.pyx:52: in pandas._libs.testing.assert_almost_equal
???


???
E   AssertionError: ExtensionArray are different
E

E   ExtensionArray values are different (66.66667 %)
E   [index]: [0, 1, 2]
E   [left]:  [00:00:00, 09:08:07.654321, 23:59:59.999999]
E   [right]: [00:00:00, 09:08:07.001485, 23:59:58.999271]

pandas/_libs/testing.pyx:167: AssertionError
Jun 14 '22 10:06 flaky-bot[bot]
Looks like this issue is flaky. :worried:
I'm going to leave this open and stop commenting.
A human should fix and close this.
When run at the same commit (439d5888550f1ad11418b65fe84f3f48434d3515), this test passed in one build (Build Status, Sponge) and failed in another build (Build Status, Sponge).
Jun 14 '22 13:06 flaky-bot[bot]
Fixed by https://github.com/googleapis/python-db-dtypes-pandas/pull/148
Closing.
Sep 28 '22 16:09 chalmerlowe
python-bigquery-pandas python-bigquery-pandas copied to clipboard

tests.system.test_read_gbq: test_default_dtypes[scalar-types-nonnull-normal-range-False] failed

python-bigquery-pandas
python-bigquery-pandas copied to clipboard