limdu
limdu copied to clipboard
Label Classification Result correct?
Hi, I have some question as below:
Here is my code:
var limdu = require('limdu');
// First, define our base classifier type (a multi-label classifier based on winnow):
var TextClassifier = limdu.classifiers.multilabel.BinaryRelevance.bind(0, {
binaryClassifierType: limdu.classifiers.Winnow.bind(0, {retrain_count: 10})
});
// Now define our feature extractor - a function that takes a sample and adds features to a given features set:
var WordExtractor = function(input, features) {
input.split(" ").forEach(function(word) {
features[word]=1;
});
};
// Initialize a classifier with the base classifier type and the feature extractor:
var intentClassifier = new limdu.classifiers.EnhancedClassifier({
classifierType: TextClassifier,
featureExtractor: WordExtractor,
normalizer: limdu.features.LowerCaseNormalizer,
pastTrainingSamples: [], // to enable retraining
});
// Train and test:
intentClassifier.trainBatch([
{input: "I want an apple", output: "apl"},
{input: "I want a banana", output: "bnn"},
{input: "I want chips", output: "cps"},
]);
console.log( intentClassifier.classify("I want chips and a doughnut") );
intentClassifier.trainOnline("I want", "req");
intentClassifier.trainOnline("You want", "req");
intentClassifier.trainOnline("We want", "req");
intentClassifier.trainOnline("They want", "req");
intentClassifier.trainOnline("He want", "req");
intentClassifier.trainOnline("She want", "req");
intentClassifier.trainOnline("It want", "req");
intentClassifier.trainOnline("I want a doughnut", "dnt");
console.log( intentClassifier.classify("I want chips and a doughnut") );
intentClassifier.retrain();
console.log( intentClassifier.classify("I want chips and a doughnut") );
An actual results are:
[ 'cps' ]
[ 'dnt', 'cps' ]
[ 'dnt', 'cps' ]
There're correct results or should be like this:
[ 'cps' ]
[ 'dnt', 'cps' ]
[ 'req', 'dnt', 'cps' ]
That's a good question. I do not know why "req" is not returned. It may be worthwhile to use the "explain" feature and see the actual numeric feature-vectors.
intentClassifier = new limdu.classifiers.EnhancedClassifier({
classifierType: TextClassifier,
normalizer: limdu.features.LowerCaseNormalizer,
featureExtractor: WordExtractor
});
intentClassifier.trainOnline("switch on the fan", "fanon");
intentClassifier.trainOnline("switch off the fan", "fanoff");
console.dir(intentClassifier.classify("switch on the fan pls",4));
and the result:
positive: fanoff: (4) ["switch+0.80", "the+0.80", "fan+0.80", "bias+0.80"] fanon: (4) ["on+0.64", "switch+0.14", "the+0.14", "fan+0.14"]
why fan off has a higher number than fan on?