compromise
compromise copied to clipboard
[Improvement]: Newspaper rule.
Was working on this. Don't know where to submit it for PR. Feel free to mention any concerns or add any tweaks / boolean for your test suites etc (if you wanna put this in).
This should help catch a large amount of news papers.
Was thinking maybe some rules for ``Sun, Sun-Times, Star, Free Press, Journal``` could be added - but might be harder.
Tho - we should be allowing years to be passed / and I thought the "date" rule might be necessary but not really sure. We don't express our selves usually like "the Toronto times tomorrow" or "the Toronto times yesterday"
ps; this will tags this like "Canada Post" as newspaper.
/**
* Extracts relevant newspaper names from a given text.
*
* @param {string} str - The input text to analyze.
* @returns {string|false} - The extracted newspaper name or false if no match is found.
*/
function newspaperRule(str) {
// Create an NLP document from the input text
let doc = nlp(str);
// Check for patterns like "the New York Post"
let match = doc.match("the (#Place+|#Place) post");
if (match.found && !doc.match("the (#Place+|#Place) post office").found) {
return match.out("text");
}
// Check for patterns like "New York Post"
match = doc.match("(#Place+|#Place) post");
if (match.found && !doc.match("the (#Place+|#Place) post office").found) {
return match.out("text");
}
// Check for patterns like "the New York Times"
match = doc.match("the (#Place+|#Place) times");
if (match.found && !doc.match("the (#Place+|#Place) times (#Date|#Date+|zones|zone)").found) {
return match.out("text");
}
// Check for patterns like "New York Times"
match = doc.match("(#Place+|#Place) times");
if (match.found && !doc.match("(#Place+|#Place) times (#Date|#Date+|zones|zone)").found) {
return match.out("text");
}
return false; // Return null if no matching newspaper name is found
}
// Tests (cause I know your a test kinda man)
const testCases = [
["what is the New York Post", true],
["where is the New York Post", true],
["where is the New York postal office", false],
["where is New York postal office", false],
["what is the New York times zones", false],
["what is New York times zones", false],
["The New York Post office is closed", false],
["New York Post", true],
["the toronto post", true],
["the washington post", true],
["washington postal office", false],
["the New York Times", true],
["the New York Times zone", false],
["New York Times", true],
["New York Times zone", false],
["read the new york times article", true],
["read the new york times tomorrow", false],
["the New York Times 2023", false],
["the Washington Post", true],
["the Washington Times", true],
["Los Angeles Times", true],
["Los Angeles Times zone", false],
["the Los Angeles Times 2023", false],
["I went to canada post today", true], // not a newspaper but org.
];
for (const [text, expected] of testCases) {
const result = newspaperRule(text);
console.log(`Input: "${text}"`);
console.log(`Expected Result: ${expected}`);
console.log(`Actual Result: ${result}`);
if ((expected && !result) || (!expected && result)) {
console.log(`Test Result: Failed`);
} else {
console.log(`Test Result: Passed`);
}
console.log('---');
}