aequitas
aequitas copied to clipboard
[Error] get_disparity_predefined_group() raises AttributeError
I try executing the following code:
bdf = b.get_disparity_predefined_groups(xtab, original_df=df,
ref_groups_dict={'race':'Caucasian'},
alpha=0.05, check_significance=True,
mask_significance=False)
bdf.style
but it raises an Attribute Error with the following details:
get_disparity_predefined_group()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-22-8a5ae26f1e35> in <module>
2 ref_groups_dict={'race':'Caucasian'},
3 alpha=0.05, check_significance=True,
----> 4 mask_significance=False)
5 bdf.style
C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in get_disparity_predefined_groups(self, df, original_df, ref_groups_dict, key_columns, input_group_metrics, fill_divbyzero, check_significance, alpha, mask_significance, selected_significance)
439 self._get_statistical_significance(
440 original_df, df, ref_dict=full_ref_dict, score_thresholds=None,
--> 441 attr_cols=None, alpha=5e-2, selected_significance=selected_significance)
442
443 # if specified, apply T/F mask to significance columns
C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in _get_statistical_significance(cls, original_df, disparity_df, ref_dict, score_thresholds, attr_cols, alpha, selected_significance)
745 for name, func in binary_col_functions.items():
746 func = func(thres_unit, 'label_value', thres_val)
--> 747 original_df.loc[:, name] = original_df.apply(func, axis=1)
748
749 # add columns for error-based significance
C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6485 args=args,
6486 kwds=kwds)
-> 6487 return op.get_result()
6488
6489 def applymap(self, func):
C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
149 return self.apply_raw()
150
--> 151 return self.apply_standard()
152
153 def apply_empty_result(self):
C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
255
256 # compute the result using the series generator
--> 257 self.apply_series_generator()
258
259 # wrap results
C:\Program_Files\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
284 try:
285 for i, v in enumerate(series_gen):
--> 286 results[i] = self.f(v)
287 keys.append(v.name)
288 except Exception as e:
C:\Program_Files\Anaconda3\lib\site-packages\aequitas\bias.py in <lambda>(x)
734
735 binary_score = lambda rank_col, label_col, thres: lambda x: (
--> 736 x[rank_col] <= thres).astype(int)
737
738 binary_col_functions = {'binary_score': binary_score,
AttributeError: ("'bool' object has no attribute 'astype'", 'occurred at index 0')
It works if I set check_significance=False
.
My data frame:
entity_id int64
race object
score float64
label_value float64
rank_abs int32
rank_pct float64
dtype: object
Any ideas why this is? I have the up to date Aequitas version this time.
I also have the same problem. my data is as follows: score 0/1 label value 0/1 sex object race object age_category object
Hi
Can you provide a minimal working example?
The following code runs fine for me on the latest aequitas:
import random
import numpy as np
import pandas as pd
n_samples = 1000
df = pd.DataFrame({
'label_value': (np.random.random((n_samples,)) > 0.95).astype(int),
'score': (np.random.random((n_samples,)) > 0.90).astype(int),
'gender': np.array(['M' if random.random() > 0.5 else 'F' for _ in range(n_samples)]),
'race': np.array(['Caucasian' if random.random() > 0.2 else 'Non-Caucasian' for _ in range(n_samples)]),
'age_category': np.array([int(random.random() * 4) for _ in range(n_samples)]).astype(str),
})
df.dtypes
from aequitas.group import Group
from aequitas.bias import Bias
attr_cols = list(set(df.columns) - {
'entity_id', 'score', 'label_value', 'as_of_date'
})
# Initialize aequitas objects
g = Group()
b = Bias()
# Get confusion matrix and metrics for each individual group and attribute
confusion_matrix_metrics, _ = g.get_crosstabs(
df, attr_cols=attr_cols,
)
bdf = b.get_disparity_predefined_groups(
confusion_matrix_metrics, original_df=df,
ref_groups_dict={
'race': 'Caucasian',
'gender': 'M',
'age_category': '1',
},
alpha=0.05, check_significance=True,
mask_significance=False,
)
bdf.style