TypeError: cannot safely cast non-equivalent float64 to int64

Open guyjansen opened this issue 2 years ago • 0 comments

Hey,

im new to coding and im trying AutoClean on a dataset but i keep getting this error: TypeError: cannot safely cast non-equivalent float64 to int64. According to ChatGPT this error typically occurs when you try to convert a floating-point number to an integer using the "int()" function or a similar method, but the float number is not a whole number, which causes a loss of precision. But it must be possible to use floats as well right? So im curious why I might get this error. My code is provided below, Thanks a lot for any help!

import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np

df = pd.read_csv('/Users/guyjansen/Desktop/Python/Housing Prices Data Science Project/train.csv') from AutoClean.autoclean import AutoClean pipeline = AutoClean(df) pipeline.output

this raises the error:

TypeError Traceback (most recent call last) ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy) 119 try: --> 120 return values.astype(dtype, casting="safe", copy=copy) 121 except TypeError as err:

TypeError: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'

The above exception was the direct cause of the following exception:

TypeError Traceback (most recent call last) /var/folders/wc/2vn5bk3x4hq0b0_hdn9tjzkm0000gn/T/ipykernel_45950/1120075175.py in 1 from AutoClean.autoclean import AutoClean ----> 2 pipeline = AutoClean(df) 3 pipeline.output

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in init(self, input_data, mode, duplicates, missing_num, missing_categ, encode_categ, extract_datetime, outliers, outlier_param, logfile, verbose) 80 81 # initialize our class and start the autoclean process ---> 82 self.output = self._clean_data(output_data, input_data) 83 84 end = timer()

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/autoclean.py in _clean_data(self, df, input_data) 141 df = Duplicates.handle(self, df) 142 df = MissingValues.handle(self, df) --> 143 df = Outliers.handle(self, df) 144 df = Adjust.convert_datetime(self, df) 145 df = EncodeCateg.handle(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in handle(self, df) 272 273 if self.outliers in ['auto', 'winz']: --> 274 df = Outliers._winsorization(self, df) 275 elif self.outliers == 'delete': 276 df = Outliers._delete(self, df)

~/opt/anaconda3/lib/python3.9/site-packages/AutoClean/modules.py in _winsorization(self, df) 300 else: 301 if (df[feature].fillna(-9999) % 1 == 0).all(): --> 302 df.loc[row_index, feature] = upper_bound 303 df[feature] = df[feature].astype(int) 304 else:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in setitem(self, key, value) 714 715 iloc = self if self.name == "iloc" else self.obj.iloc --> 716 iloc._setitem_with_indexer(indexer, value, self.name) 717 718 def _validate_key(self, key, axis: int):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name) 1689 if take_split_path: 1690 # We have to operate column-wise -> 1691 self._setitem_with_indexer_split_path(indexer, value, name) 1692 else: 1693 self._setitem_single_block(indexer, value, name)

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer_split_path(self, indexer, value, name) 1782 # scalar value 1783 for loc in ilocs: -> 1784 self._setitem_single_column(loc, value, pi) 1785 1786 def _setitem_with_indexer_2d_value(self, indexer, value):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_single_column(self, loc, value, plane_indexer) 1888 1889 orig_values = ser._values -> 1890 ser._mgr = ser._mgr.setitem((pi,), value) 1891 1892 if ser._values is orig_values:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in setitem(self, indexer, value) 335 For SingleBlockManager, this backs s[indexer] = value 336 """ --> 337 return self.apply("setitem", indexer=indexer, value=value) 338 339 def putmask(self, mask, new, align: bool = True):

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs) 302 applied = b.apply(f, **kwargs) 303 else: --> 304 applied = getattr(b, f)(**kwargs) 305 except (TypeError, NotImplementedError): 306 if not ignore_failures:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in setitem(self, indexer, value) 1620 1621 check_setitem_lengths(indexer, value, self.values) -> 1622 self.values[indexer] = value 1623 return self 1624

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py in setitem(self, key, value) 222 if _is_scalar: 223 value = [value] --> 224 value, mask = self._coerce_to_array(value) 225 226 if _is_scalar:

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in _coerce_to_array(self, value) 334 335 def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: --> 336 return coerce_to_array(value, dtype=self.dtype) 337 338 @overload

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in coerce_to_array(values, dtype, mask, copy) 228 values = values.astype(dtype, copy=copy) 229 else: --> 230 values = safe_cast(values, dtype, copy=False) 231 232 return values, mask

~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/integer.py in safe_cast(values, dtype, copy) 124 return casted 125 --> 126 raise TypeError( 127 f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}" 128 ) from err

TypeError: cannot safely cast non-equivalent float64 to int64

Feb 27 '23 01:02 guyjansen