pydqc
pydqc copied to clipboard
Selected KDE bandwidth is 0. Cannot estimate density.
Selected KDE bandwidth is 0. Cannot estimate density.
runtime error is thrown when trying to plot the kde of data, with the below characteristics:
default_df['overdue_accts'].value_counts()
0 43408 1 5382 2 1241 3 361 4 120 5 48 6 22 7 7 8 7 9 4 11 4 12 2 16 1 10 1 13 1 14 1 18 1
nondefault_df['overdue_accts'].value_counts()
0 163471 1 14588 2 3061 3 841 4 284 5 118 6 74 7 31 9 21 8 20 11 8 12 6 10 5 13 4 14 4 17 2 23 1 15 1 18 1 19 1 25 1
This behaviour is seen when the package statsmodels is installed in the same environment, as seaborn tries use statsmodels.
The following is the error stack trace :
ValueError Traceback (most recent call last) ~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/statsmodels/nonparametric/kde.py in kdensityfft(X, kernel, bw, weights, gridsize, adjust, clip, cut, retgrid) 450 try: --> 451 bw = float(bw) 452 except:
ValueError: could not convert string to float: 'scott'
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
in 6 for col in default_df.columns: 7 print(col) ----> 8 distribution_compare_pretty(default_df,nondefault_df,col=col,figsize=None) ~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/pydqc/data_compare.py in distribution_compare_pretty(_df1, _df2, col, figsize, date_flag) 125 plt.legend(loc=1) 126 else: --> 127 ax1 = sns.distplot(df1_draw_values, color=TABLE1_DARK, hist=False, label='table1') 128 ax2 = sns.distplot(df2_draw_values, color=TABLE2_DARK, hist=False, label='table2') 129 y_low_1, y_up_1 = ax1.get_ylim()
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/seaborn/distributions.py in distplot(a, bins, hist, kde, rug, fit, hist_kws, kde_kws, rug_kws, fit_kws, color, vertical, norm_hist, axlabel, label, ax) 231 if kde: 232 kde_color = kde_kws.pop("color", color) --> 233 kdeplot(a, vertical=vertical, ax=ax, color=kde_color, **kde_kws) 234 if kde_color != color: 235 kde_kws["color"] = kde_color
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/seaborn/distributions.py in kdeplot(data, data2, shade, vertical, kernel, bw, gridsize, cut, clip, legend, cumulative, shade_lowest, cbar, cbar_ax, cbar_kws, ax, **kwargs) 703 ax = _univariate_kdeplot(data, shade, vertical, kernel, bw, 704 gridsize, cut, clip, legend, ax, --> 705 cumulative=cumulative, **kwargs) 706 707 return ax
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/seaborn/distributions.py in _univariate_kdeplot(data, shade, vertical, kernel, bw, gridsize, cut, clip, legend, ax, cumulative, **kwargs) 293 x, y = _statsmodels_univariate_kde(data, kernel, bw, 294 gridsize, cut, clip, --> 295 cumulative=cumulative) 296 else: 297 # Fall back to scipy if missing statsmodels
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/seaborn/distributions.py in _statsmodels_univariate_kde(data, kernel, bw, gridsize, cut, clip, cumulative) 365 fft = kernel == "gau" 366 kde = smnp.KDEUnivariate(data) --> 367 kde.fit(kernel, bw, fft, gridsize=gridsize, cut=cut, clip=clip) 368 if cumulative: 369 grid, y = kde.support, kde.cdf
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/statsmodels/nonparametric/kde.py in fit(self, kernel, bw, fft, weights, gridsize, adjust, cut, clip) 138 density, grid, bw = kdensityfft(endog, kernel=kernel, bw=bw, 139 adjust=adjust, weights=weights, gridsize=gridsize, --> 140 clip=clip, cut=cut) 141 else: 142 density, grid, bw = kdensity(endog, kernel=kernel, bw=bw,
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/statsmodels/nonparametric/kde.py in kdensityfft(X, kernel, bw, weights, gridsize, adjust, clip, cut, retgrid) 451 bw = float(bw) 452 except: --> 453 bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern? 454 bw *= adjust 455
~/Developer/anaconda3/envs/gdd3.6/lib/python3.6/site-packages/statsmodels/nonparametric/bandwidths.py in select_bandwidth(x, bw, kernel) 172 # eventually this can fall back on another selection criterion. 173 err = "Selected KDE bandwidth is 0. Cannot estimate density." --> 174 raise RuntimeError(err) 175 else: 176 return bandwidth
RuntimeError: Selected KDE bandwidth is 0. Cannot estimate density.