lifelines icon indicating copy to clipboard operation
lifelines copied to clipboard

KaplanMeierFitter: Index Error when adding at_risk_counts

Open tobiasweede opened this issue 2 years ago • 5 comments

Python 3.8 (conda env) lifelines-0.27.1

Using the into on the docs website: https://lifelines.readthedocs.io/en/latest/Survival%20analysis%20with%20lifelines.html

kmf = KaplanMeierFitter().fit(T, E, label="all_regimes")
kmf.plot_survival_function(at_risk_counts=True)
plt.tight_layout()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [88], in <cell line: 2>()
      1 kmf = KaplanMeierFitter().fit(T, E, label="all_regimes")
----> 2 kmf.plot_survival_function(at_risk_counts=True)
      3 plt.tight_layout()

File ~/.conda/envs/survival/lib/python3.8/site-packages/lifelines/fitters/kaplan_meier_fitter.py:453, in KaplanMeierFitter.plot_survival_function(self, **kwargs)
    451 """Alias of ``plot``"""
    452 if not CensoringType.is_interval_censoring(self):
--> 453     return _plot_estimate(self, estimate="survival_function_", **kwargs)
    454 else:
    455     # hack for now.
    456     def safe_pop(dict, key):

File ~/.conda/envs/survival/lib/python3.8/site-packages/lifelines/plotting.py:961, in _plot_estimate(cls, estimate, loc, iloc, show_censors, censor_styles, ci_legend, ci_force_lines, ci_only_lines, ci_no_lines, ci_alpha, ci_show, at_risk_counts, logx, ax, **kwargs)
    950         plot_estimate_config.ax.fill_between(
    951             x,
    952             lower,
   (...)
    957             step=step,
    958         )
    960 if at_risk_counts:
--> 961     add_at_risk_counts(cls, ax=plot_estimate_config.ax)
    962     plt.tight_layout()
    964 return plot_estimate_config.ax

File ~/.conda/envs/survival/lib/python3.8/site-packages/lifelines/plotting.py:512, in add_at_risk_counts(labels, rows_to_show, ypos, xticks, ax, at_risk_count_from_start_of_period, *fitters, **kwargs)
    505     event_table_slice = f.event_table.assign(at_risk=lambda x: x.at_risk - x.removed)
    507 event_table_slice = (
    508     event_table_slice.loc[:tick, ["at_risk", "censored", "observed"]]
    509     .agg({"at_risk": lambda x: x.tail(1).values, "censored": "sum", "observed": "sum"})  # see #1385
    510     .rename({"at_risk": "At risk", "censored": "Censored", "observed": "Events"})
    511 )
--> 512 tmp = [int(c) for c in event_table_slice.loc[rows_to_show]]
    513 print(tmp)
    514 counts.extend([int(c) for c in event_table_slice.loc[rows_to_show]])

File ~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:879, in _LocationIndexer.__getitem__(self, key)
    876 axis = self.axis or 0
    878 maybe_callable = com.apply_if_callable(key, self.obj)
--> 879 return self._getitem_axis(maybe_callable, axis=axis)

File ~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1099, in _LocIndexer._getitem_axis(self, key, axis)
   1096     if hasattr(key, "ndim") and key.ndim > 1:
   1097         raise ValueError("Cannot index with multidimensional key")
-> 1099     return self._getitem_iterable(key, axis=axis)
   1101 # nested tuple slicing
   1102 if is_nested_tuple(key, labels):

File ~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1037, in _LocIndexer._getitem_iterable(self, key, axis)
   1034 self._validate_key(key, axis)
   1036 # A collection of keys
-> 1037 keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
   1038 return self.obj._reindex_with_indexers(
   1039     {axis: [keyarr, indexer]}, copy=True, allow_dups=True
   1040 )

File ~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1254, in _LocIndexer._get_listlike_indexer(self, key, axis, raise_missing)
   1251 else:
   1252     keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
-> 1254 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
   1255 return keyarr, indexer

File ~/.local/lib/python3.8/site-packages/pandas/core/indexing.py:1298, in _LocIndexer._validate_read_indexer(self, key, indexer, axis, raise_missing)
   1296 if missing == len(indexer):
   1297     axis_name = self.obj._get_axis_name(axis)
-> 1298     raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   1300 # We (temporarily) allow for some missing keys with .loc, except in
   1301 # some cases (e.g. setting) in which "raise_missing" will be False
   1302 if raise_missing:

KeyError: "None of [Index(['At risk', 'Censored', 'Events'], dtype='object')] are in the [index]"

tobiasweede avatar Aug 23 '22 07:08 tobiasweede

Maybe related to #1452, which will be fixed in the next release

CamDavidsonPilon avatar Aug 25 '22 19:08 CamDavidsonPilon

I applied the fix to my private fork and the error remained (python 3.9.4, pandas 1.1.5). This indexing is what causes the issue: https://github.com/CamDavidsonPilon/lifelines/blob/aa019b18f28909f3658496d6f819f41077e6e060/lifelines/plotting.py#L514

juancq avatar Sep 28 '22 02:09 juancq

Hm, I'm not able to repro. on the latest lifelines, with pandas 1.5. Could it be a Pandas issue?

CamDavidsonPilon avatar Sep 28 '22 16:09 CamDavidsonPilon

Yes, when I downgrade to 1.1.5, I encounter this problem.

CamDavidsonPilon avatar Sep 28 '22 17:09 CamDavidsonPilon

I used the solution as in here https://github.com/CamDavidsonPilon/lifelines/commit/9b36d87410c2fea79f5780bb4d58e8c8153be8f2, changing the indexing and the rename (adding "columns=") to get it to work with pandas 1.1.5 (due to project restrictions).

  if not event_table_slice.loc[:tick].empty:
      event_table_slice = (
          event_table_slice.loc[:tick, ["at_risk", "censored", "observed"]]
          .agg({"at_risk": lambda x: x.tail(1).values, "censored": "sum", "observed": "sum"})  # see #1385
          .rename(columns={"at_risk": "At risk", "censored": "Censored", "observed": "Events"})
          .fillna(0)
      )
      counts.extend([int(event_table_slice[c]) for c in event_table_slice[rows_to_show]])
  else:
      counts.extend([0 for _ in range(n_rows)])

The easiest solution would be recommending to people to upgrade to the latest pandas.

juancq avatar Oct 05 '22 05:10 juancq