tableone
tableone copied to clipboard
TypeError: Int64: converted = maybe_downcast_numeric(result, dtype, do_round)
When run in a Colab notebook (specifically https://github.com/MIT-LCP/hack-aotearoa/blob/main/03_summary_statistics.ipynb), the following chunk raises a data type error (related to Int64 formatted values).
from tableone import TableOne
%%bigquery cohort
SELECT p.unitadmitsource, p.gender, p.age, p.ethnicity, p.admissionweight,
p.unittype, p.unitstaytype, a.acutephysiologyscore,
a.apachescore, a.actualiculos, a.actualhospitalmortality,
a.unabridgedunitlos, a.unabridgedhosplos
FROM `physionet-data.eicu_crd_demo.patient` p
INNER JOIN `physionet-data.eicu_crd_demo.apachepatientresult` a
ON p.patientunitstayid = a.patientunitstayid
WHERE apacheversion LIKE 'IVa'
# also acutephysiologyscore
TableOne(cohort, columns=['apachescore'], groupby='actualhospitalmortality', label_suffix=True, limit=4)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
[<ipython-input-28-e91ff0667d39>](https://localhost:8080/#) in <module>
----> 1 TableOne(cohort, columns=['apachescore'], rename={'agenum': 'age'},
2 groupby='actualhospitalmortality',
3 label_suffix=True, limit=4)
4 frames
[/usr/local/lib/python3.9/dist-packages/tableone/tableone.py](https://localhost:8080/#) in __init__(self, data, columns, categorical, groupby, nonnormal, min_max, pval, pval_adjust, htest_name, pval_test_name, htest, isnull, missing, ddof, labels, rename, sort, limit, order, remarks, label_suffix, decimals, smd, overall, row_percent, display_all, dip_test, normal_test, tukey_test)
410
411 if self._continuous:
--> 412 self.cont_describe = self._create_cont_describe(data,
413 self._groupby)
414
[/usr/local/lib/python3.9/dist-packages/tableone/tableone.py](https://localhost:8080/#) in _create_cont_describe(self, data, groupby)
919
920 # group and aggregate data
--> 921 df_cont = pd.pivot_table(cont_data,
922 columns=[groupby],
923 aggfunc=aggfuncs)
[/usr/local/lib/python3.9/dist-packages/pandas/core/reshape/pivot.py](https://localhost:8080/#) in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
74 keys = []
75 for func in aggfunc:
---> 76 _table = __internal_pivot_table(
77 data,
78 values=values,
[/usr/local/lib/python3.9/dist-packages/pandas/core/reshape/pivot.py](https://localhost:8080/#) in __internal_pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed, sort)
185 # agged.columns is a MultiIndex and 'v' is indexing only
186 # on its first level.
--> 187 agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
188
189 table = agged
[/usr/local/lib/python3.9/dist-packages/pandas/core/dtypes/cast.py](https://localhost:8080/#) in maybe_downcast_to_dtype(result, dtype)
275 if not isinstance(dtype, np.dtype):
276 # enforce our signature annotation
--> 277 raise TypeError(dtype) # pragma: no cover
278
279 converted = maybe_downcast_numeric(result, dtype, do_round)
TypeError: Int64
We found this error in https://github.com/MIT-LCP/hack-aotearoa/blob/master/03_summary_statistics.ipynb. Looking at it a little more closely, the following chunk is fine:
import pandas as pd
from tableone import TableOne, load_dataset
d = {'col1': [1, 2, 4, 5],
'col2': [3, 4, 5, 6],
'outcome': [0,1,1,0]}
df = pd.DataFrame(data=d)
df.dtypes
# col1 int64
# col2 int64
# outcome int64
# dtype: object
TableOne(df, columns=['col1', 'col2'], groupby='outcome')
# works fine
Converting one of the int64 to Int64 raises the error:
df2 = df.astype({"col1": "Int64"})
TableOne(df2, columns=['col1', 'col2'], groupby='outcome')
@ngphubinh
The error is raised for pandas==1.4.3
. When running pandas>=2.0.0
, the error is not raised and everything seems to work fine.