error in 03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb
Hi,
03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb
error in ### Step 6. Print the mean alcohol consumption per continent for every column
i thought something wrong in my own code. but then i opened and executed the Exercise_with_solutions.pynb file, the error can be replicated there as well
drinks.groupby('continent').mean()
=================================================================
TypeError Traceback (most recent call last) File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:1942, in GroupBy._agg_py_fallback(self, how, values, ndim, alt) 1941 try: -> 1942 res_values = self._grouper.agg_series(ser, alt, preserve_dtype=True) 1943 except Exception as err:
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\ops.py:863, in BaseGrouper.agg_series(self, obj, func, preserve_dtype) 861 preserve_dtype = True --> 863 result = self._aggregate_series_pure_python(obj, func) 865 npvalues = lib.maybe_convert_objects(result, try_float=False)
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\ops.py:884, in BaseGrouper._aggregate_series_pure_python(self, obj, func) 883 for i, group in enumerate(splitter): --> 884 res = func(group) 885 res = extract_result(res)
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:2454, in GroupBy.mean.
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\series.py:6409, in Series.mean(self, axis, skipna, numeric_only, **kwargs) 6401 @doc(make_doc("mean", ndim=1)) 6402 def mean( 6403 self, (...) 6407 **kwargs, 6408 ): -> 6409 return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\generic.py:12407, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) 12400 def mean( 12401 self, 12402 axis: Axis | None = 0, (...) 12405 **kwargs, 12406 ) -> Series | float:
12407 return self._stat_function( 12408 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs 12409 )
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\generic.py:12364, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs) 12362 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
12364 return self._reduce( 12365 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only 12366 )
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\series.py:6317, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds) 6313 raise TypeError( 6314 f"Series.{name} does not allow {kwd_name}={numeric_only} " 6315 "with non-numeric dtypes." 6316 ) -> 6317 return op(delegate, skipna=skipna, **kwds)
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\nanops.py:147, in bottleneck_switch.call.
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\nanops.py:404, in _datetimelike_compat.
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\nanops.py:720, in nanmean(values, axis, skipna, mask) 719 the_sum = values.sum(axis, dtype=dtype_sum) --> 720 the_sum = _ensure_numeric(the_sum) 722 if axis is not None and getattr(the_sum, "ndim", False):
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\nanops.py:1701, in _ensure_numeric(x) 1699 if isinstance(x, str): 1700 # GH#44008, GH#36703 avoid casting e.g. strings to numeric -> 1701 raise TypeError(f"Could not convert string '{x}' to numeric") 1702 try:
TypeError: Could not convert string 'AlgeriaAngolaBeninBotswanaBurkina FasoBurundiCote d'IvoireCabo VerdeCameroonCentral African RepublicChadComorosCongoDR CongoDjiboutiEgyptEquatorial GuineaEritreaEthiopiaGabonGambiaGhanaGuineaGuinea-BissauKenyaLesothoLiberiaLibyaMadagascarMalawiMaliMauritaniaMauritiusMoroccoMozambiqueNamibiaNigerNigeriaRwandaSao Tome & PrincipeSenegalSeychellesSierra LeoneSomaliaSouth AfricaSudanSwazilandTogoTunisiaUgandaTanzaniaZambiaZimbabwe' to numeric
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last) Cell In[5], line 1 ----> 1 drinks.groupby('continent').mean()
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:2452, in GroupBy.mean(self, numeric_only, engine, engine_kwargs) 2445 return self._numba_agg_general( 2446 grouped_mean, 2447 executor.float_dtype_mapping, 2448 engine_kwargs, 2449 min_periods=0, 2450 ) 2451 else: -> 2452 result = self._cython_agg_general( 2453 "mean", 2454 alt=lambda x: Series(x, copy=False).mean(numeric_only=numeric_only), 2455 numeric_only=numeric_only, 2456 ) 2457 return result.finalize(self.obj, method="groupby")
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:1998, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs) 1995 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt) 1996 return result -> 1998 new_mgr = data.grouped_reduce(array_func) 1999 res = self._wrap_agged_manager(new_mgr) 2000 if how in ["idxmin", "idxmax"]:
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\internals\managers.py:1470, in BlockManager.grouped_reduce(self, func) 1466 if blk.is_object: 1467 # split on object-dtype blocks bc some columns may raise 1468 # while others do not. 1469 for sb in blk._split(): -> 1470 applied = sb.apply(func) 1471 result_blocks = extend_blocks(applied, result_blocks) 1472 else:
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\internals\blocks.py:392, in Block.apply(self, func, **kwargs) 386 @final 387 def apply(self, func, **kwargs) -> list[Block]: 388 """ 389 apply the function to my values; return a block if we are not 390 one 391 """ --> 392 result = func(self.values, **kwargs) 394 result = maybe_coerce_values(result) 395 return self._split_op_result(result)
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:1995, in GroupBy._cython_agg_general.
File ~\OneDrive\Documents\pandas_exercises\venv\Lib\site-packages\pandas\core\groupby\groupby.py:1946, in GroupBy._agg_py_fallback(self, how, values, ndim, alt) 1944 msg = f"agg function failed [how->{how},dtype->{ser.dtype}]" 1945 # preserve the kind of exception that raised -> 1946 raise type(err)(msg) from err 1948 if ser.dtype == object: 1949 res_values = res_values.astype(object, copy=False)
TypeError: agg function failed [how->mean,dtype->object]
Thanks
me too
Issue
In 03_Grouping/Alcohol_Consumption/Exercise_with_solutions.ipynb, Step 6 (drinks.groupby('continent').mean()) raises a TypeError in modern pandas versions (e.g., 2.x) due to attempting to compute the mean of the non-numeric country column.
Fix
Updated the code to drinks.groupby('continent').mean(numeric_only=True) to exclude non-numeric columns, ensuring compatibility with pandas 2.x.
Impact
This fixes the error and aligns the solution with current pandas behavior, improving the exercise for learners.
I created a pull request please review it!