aequitas icon indicating copy to clipboard operation
aequitas copied to clipboard

[Error] get_disparity_predefined_group() raises AttributeError

Open LiFaytheGoblin opened this issue 4 years ago • 3 comments

I try executing the following code:

bdf = b.get_disparity_predefined_groups(xtab, original_df=df, 
                                        ref_groups_dict={'race':'Caucasian'}, 
                                        alpha=0.05, check_significance=True, 
                                        mask_significance=False)
bdf.style

but it raises an Attribute Error with the following details:

get_disparity_predefined_group()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-22-8a5ae26f1e35> in <module>
      2                                         ref_groups_dict={'race':'Caucasian'},
      3                                         alpha=0.05, check_significance=True,
----> 4                                         mask_significance=False)
      5 bdf.style

C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in get_disparity_predefined_groups(self, df, original_df, ref_groups_dict, key_columns, input_group_metrics, fill_divbyzero, check_significance, alpha, mask_significance, selected_significance)
    439             self._get_statistical_significance(
    440                 original_df, df, ref_dict=full_ref_dict, score_thresholds=None,
--> 441                 attr_cols=None, alpha=5e-2, selected_significance=selected_significance)
    442 
    443             # if specified, apply T/F mask to significance columns

C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in _get_statistical_significance(cls, original_df, disparity_df, ref_dict, score_thresholds, attr_cols, alpha, selected_significance)
    745                 for name, func in binary_col_functions.items():
    746                     func = func(thres_unit, 'label_value', thres_val)
--> 747                     original_df.loc[:, name] = original_df.apply(func, axis=1)
    748 
    749         # add columns for error-based significance

C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6485                          args=args,
   6486                          kwds=kwds)
-> 6487         return op.get_result()
   6488 
   6489     def applymap(self, func):

C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
    149             return self.apply_raw()
    150 
--> 151         return self.apply_standard()
    152 
    153     def apply_empty_result(self):

C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
    255 
    256         # compute the result using the series generator
--> 257         self.apply_series_generator()
    258 
    259         # wrap results

C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
    284             try:
    285                 for i, v in enumerate(series_gen):
--> 286                     results[i] = self.f(v)
    287                     keys.append(v.name)
    288             except Exception as e:

C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in <lambda>(x)
    734 
    735         binary_score = lambda rank_col, label_col, thres: lambda x: (
--> 736                 x[rank_col] <= thres).astype(int)
    737 
    738         binary_col_functions = {'binary_score': binary_score,

AttributeError: ("'bool' object has no attribute 'astype'", 'occurred at index 0')

It works if I set check_significance=False.

My data frame:

entity_id        int64
race            object
score          float64
label_value    float64
rank_abs         int32
rank_pct       float64
dtype: object

Any ideas why this is? I have the up to date Aequitas version this time.

LiFaytheGoblin avatar Jul 09 '20 09:07 LiFaytheGoblin

I also have the same problem. my data is as follows: score 0/1 label value 0/1 sex object race object age_category object

kalikhademi avatar Mar 16 '21 21:03 kalikhademi

Hi

Can you provide a minimal working example?

The following code runs fine for me on the latest aequitas:

import random
import numpy as np
import pandas as pd

n_samples = 1000

df = pd.DataFrame({
    'label_value': (np.random.random((n_samples,)) > 0.95).astype(int),
    'score': (np.random.random((n_samples,)) > 0.90).astype(int),
    'gender': np.array(['M' if random.random() > 0.5 else 'F' for _ in range(n_samples)]),
    'race': np.array(['Caucasian' if random.random() > 0.2 else 'Non-Caucasian' for _ in range(n_samples)]),
    'age_category': np.array([int(random.random() * 4) for _ in range(n_samples)]).astype(str),
})
df.dtypes

from aequitas.group import Group
from aequitas.bias import Bias

attr_cols = list(set(df.columns) - {
    'entity_id', 'score', 'label_value', 'as_of_date'
})

# Initialize aequitas objects
g = Group()
b = Bias()

# Get confusion matrix and metrics for each individual group and attribute
confusion_matrix_metrics, _ = g.get_crosstabs(
    df, attr_cols=attr_cols,
)


bdf = b.get_disparity_predefined_groups(
    confusion_matrix_metrics, original_df=df, 
    ref_groups_dict={
        'race': 'Caucasian',
        'gender': 'M',
        'age_category': '1',
    }, 
    alpha=0.05, check_significance=True, 
    mask_significance=False,
)
bdf.style

AndreFCruz avatar Mar 19 '21 10:03 AndreFCruz