naive-bayes
naive-bayes copied to clipboard
predict() only returns a single class for any input
When placing my sample data (see below) into GaussianNB 'Native bayes' / 'Basic test', the test fails and all results only belong to a single class. What may be the reason?
cases:
[
[2,0.8628123468887801,0,0.00016331863465621427,0,0,0,0,0.9998366813653438,0,1,0,0,0,0,1,0.001796504981218357,0.1270618977625347,0,0.82312591866732,0,0,0.048015678588927,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.8628123468887801,0.00016331863465621427,0,0,0,0,0,0.9998366813653438,1,0,0,0,0,0,1,0.0019598236158745713,0.13326800587947085,0,0.8579127878490935,0,0,0.006859382655560999,1,1,0,1,0,0,1,0.00016331863465621427,0.9998366813653438,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9817605075337034,0.0007930214115781126,0,0,0,0,0,0.9992069785884219,1,0,0,0,0,0,1,0,0.08406026962727994,0,0.9095955590800952,0,0,0.006344171292624901,0,1,0,1,0,0,1,0.0007930214115781126,0.9992069785884219,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.9992069785884219,0,0,0,0,0,0,0.0007930214115781126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0],
[2,0.9817605075337034,0,0.0007930214115781126,0,0,0,0,0.9992069785884219,0,1,0,0,0,0,1,0,0.08326724821570182,0,0.9119746233148295,0,0,0.004758128469468675,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[4,0.03284950343773873,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0.2184873949579832,0,0,0.7815126050420168,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
[4,0.18029029793735676,0.03666921313980138,0.0106951871657754,0,0,0,0,0.9526355996944232,1,1,0,0,0,0,1,0,0.013750954927425516,0,0.9763177998472116,0,0.0007639419404125286,0.009167303284950344,0,1,0,1,0,1,1,0.03972498090145149,0.9587471352177235,0,0,0.0015278838808250573,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.9923605805958747,0,0,0,0,0,0,0.007639419404125287,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0],
[4,0.18029029793735676,0.006875477463712758,0.03972498090145149,0,0,0,0,0.9533995416348358,1,1,0,0,0,0,1,0,0.013750954927425516,0,0.9824293353705118,0,0.0007639419404125286,0.0030557677616501145,0,1,0,1,0,1,1,0.009931245225362872,0.988540870893812,0,0,0.0015278838808250573,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.9954163483575248,0,0,0,0,0,0,0.004583651642475172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0],
[4,0.03284950343773873,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0.7608861726508785,0,0,0.23911382734912145,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],[2,0.9175675675675675,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.15405405405405406,0,0.8391891891891892,0,0,0.006756756756756757,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9175675675675675,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0.0013513513513513514,0.1445945945945946,0,0.8040540540540541,0,0,0.05,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.96996996996997,0,0.17417417417417416,0,0,0.001001001001001001,0,0.8248248248248248,0,1,0,0,1,0,1,0,0.06906906906906907,0,0.8828828828828829,0,0.002002002002002002,0.04604604604604605,0,1,0,1,0,1,1,0,0.950950950950951,0,0,0.04904904904904905,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.950950950950951,0,0,0,0,0,0,0.04904904904904905,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0],
[2,0.96996996996997,0,0.14714714714714713,0,0,0,0,0.8528528528528528,0,1,0,0,0,0,1,0,0.08108108108108109,0,0.8888888888888888,0,0.002002002002002002,0.028028028028028028,0,1,0,1,0,1,1,0,0.8238238238238238,0,0,0.17617617617617617,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.8238238238238238,0,0,0,0,0,0,0.17617617617617617,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0],
[2,0.9762126865671642,0,0,0,0,0.8740671641791045,0,0.1259328358208955,0,0,0,0,1,0,1,0,0.0853544776119403,0,0.8964552238805971,0,0.0023320895522388058,0.01585820895522388,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9762126865671642,0,0,0,0,0.8740671641791045,0,0.1259328358208955,0,0,0,0,1,0,1,0,0.08395522388059702,0,0.8997201492537313,0,0.0013992537313432835,0.014925373134328358,0,1,0,1,0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9302325581395349,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.08527131782945736,0,0.9043927648578811,0,0,0.0103359173126615,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9302325581395349,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.07493540051679587,0,0.9121447028423773,0,0,0.012919896640826873,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[3,0.8485804416403786,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.0473186119873817,0,0.9495268138801262,0,0,0.0031545741324921135,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[3,0.8485804416403786,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.0473186119873817,0,0.9305993690851735,0,0,0.022082018927444796,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9661016949152542,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.26101694915254237,0,0.7322033898305085,0,0,0.006779661016949152,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[2,0.9661016949152542,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.2542372881355932,0,0.7322033898305085,0,0,0.013559322033898305,0,1,0,1,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0]
]
predictions:
[0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
results:
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
by the way, the same happens when I simplify the cases to just
[[2,1],[2,1],[2,1],[2,1],[4,0],[4,0],[4,0],[4,0],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[2,1],[3,1],[3,1],[2,1],[2,1]]
where the correlation between the first column and the predictions is apparent
At the training process, when std is 0 (i.e. all numbers in a given column are the same), (1 / (C1 * currentStd))
evaluates to Infinity (this is not that bad) and -2 * currentStd * currentStd
is 0 (this is worse). This seems to be a problem later during prediction, where currentProbability becomes NaN because calculateLogProbability fails when C2 is 0.
Tests and a possible fix at https://github.com/pavelstudeny/naive-bayes/commit/17c172ce507705f0dc3aa5930d0274deaeee0f8a
May be @JeffersonH44 you wanna fix divisions by zero on all places another way? For example, if you eventually implemented continuous training (e.g. call train
for individual rows as they arrive), the classifier might never recover from some of the Infinity values.
altough this still fails:
var cases = [[2,0.8628123468887801],[2,0.8628123468887801],[2,0.9817605075337034],[2,0.9817605075337034],[2,0.9175675675675675],[2,0.9175675675675675],[2,0.96996996996997],[2,0.96996996996997],[2,0.9762126865671642],[2,0.9762126865671642],[3,0.009009009009009009],[3,0.009009009009009009],[3,0],[2,0.9302325581395349],[2,0.9302325581395349],[4,0.09879518072289156],[4,0.0963855421686747],[4,0],[4,0.0024096385542168677],[3,0.015822784810126583]];
var predictions = [0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1];
var nb = new GaussianNB();
nb.train(cases, predictions);
var result = nb.predict([cases[0], cases[18]]);
expect(result).toEqual([0, 1]);
possible to resolve by https://github.com/mljs/naive-bayes/pull/10