healthcareai-py
healthcareai-py copied to clipboard
Utilities :: Validation Prediction Distribution Plots
Background
This is how we should be communicating validation output.
Working code from client work
def plot_distributions(df, actual_col, prediction_col, pos_label='Y', neg_label='N', bins=10, threshold=None):
import seaborn as sns
import matplotlib.pyplot as plt
plt.xlim(0, 1)
positives = df.loc[df[actual_col] == pos_label]
negatives = df.loc[df[actual_col] == neg_label]
ax = sns.distplot(positives[prediction_col], kde=False, label=pos_label, bins=bins, color='g')
ax = sns.distplot(negatives[prediction_col], kde=False, label=neg_label, bins=bins, color='b')
ax.set_title('Distributions of {} for {}'.format(prediction_col, actual_col))
ax.legend()
if threshold:
plt.axvline(x=threshold, color='r')
plt.show()