compromise icon indicating copy to clipboard operation
compromise copied to clipboard

[Improvement]: Newspaper rule.

Open MarketingPip opened this issue 10 months ago • 3 comments

Was working on this. Don't know where to submit it for PR. Feel free to mention any concerns or add any tweaks / boolean for your test suites etc (if you wanna put this in).

This should help catch a large amount of news papers.

Was thinking maybe some rules for ``Sun, Sun-Times, Star, Free Press, Journal``` could be added - but might be harder.

Tho - we should be allowing years to be passed / and I thought the "date" rule might be necessary but not really sure. We don't express our selves usually like "the Toronto times tomorrow" or "the Toronto times yesterday"

ps; this will tags this like "Canada Post" as newspaper.


/**
 * Extracts relevant newspaper names from a given text.
 *
 * @param {string} str - The input text to analyze.
 * @returns {string|false} - The extracted newspaper name or false if no match is found.
 */

function newspaperRule(str) {
  // Create an NLP document from the input text
  let doc = nlp(str);

  // Check for patterns like "the New York Post"
  let match = doc.match("the (#Place+|#Place) post");
  if (match.found && !doc.match("the (#Place+|#Place) post office").found) {
    return match.out("text");
  }

  // Check for patterns like "New York Post"
  match = doc.match("(#Place+|#Place) post");
  if (match.found && !doc.match("the (#Place+|#Place) post office").found) {
    return match.out("text");
  }

  // Check for patterns like "the New York Times"
  match = doc.match("the (#Place+|#Place) times");
  if (match.found && !doc.match("the (#Place+|#Place) times (#Date|#Date+|zones|zone)").found) {
    return match.out("text");
  }

  // Check for patterns like "New York Times"
  match = doc.match("(#Place+|#Place) times");
  if (match.found && !doc.match("(#Place+|#Place) times (#Date|#Date+|zones|zone)").found) {
    return match.out("text");
  }

  return false; // Return null if no matching newspaper name is found
}

// Tests (cause I know your a test kinda man)

const testCases = [
  ["what is the New York Post", true],
  ["where is the New York Post", true],
  ["where is the New York postal office", false],
  ["where is New York postal office", false],
  ["what is the New York times zones", false],
  ["what is New York times zones", false],
  ["The New York Post office is closed", false],
  ["New York Post", true],
  ["the toronto post", true],
  ["the washington post", true],
  ["washington postal office", false],
  ["the New York Times", true],
  ["the New York Times zone", false],
  ["New York Times", true],
  ["New York Times zone", false],
  ["read the new york times article", true],
  ["read the new york times tomorrow", false],
  ["the New York Times 2023", false],
  ["the Washington Post", true],
  ["the Washington Times", true],
  ["Los Angeles Times", true],
  ["Los Angeles Times zone", false],
  ["the Los Angeles Times 2023", false],
  ["I went to canada post today", true], // not a newspaper but org.
];

for (const [text, expected] of testCases) {
  const result = newspaperRule(text);
  console.log(`Input: "${text}"`);
  console.log(`Expected Result: ${expected}`);
  console.log(`Actual Result: ${result}`);
  if ((expected && !result) || (!expected && result)) {
    console.log(`Test Result: Failed`);
  } else {
    console.log(`Test Result: Passed`);
  }
   
  console.log('---');
}

MarketingPip avatar Sep 13 '23 02:09 MarketingPip