Wikipedia icon indicating copy to clipboard operation
Wikipedia copied to clipboard

When I attempt to make multiple requests at once I get a lot of PageErrors (parallel or sequential) even on valid items

Open contractorwolf opened this issue 1 year ago • 1 comments

When I do these one at a time they all resolve with a page, when I do more than ten at a time they start to throw pageerrors. I made a little code sample that perfectly illustrates the issue:

const wiki = require('wikipedia');

const subjects = [ "University of Washington", "USC Gould School of Law", "Watergate", "Supreme Court", "Justice Clarence Thomas", "Harlan Crow", "resignation", "impeachment", "public trust", "code of ethics", "University of Washington", "USC Gould School of Law", "Watergate", "Supreme Court", "Justice Clarence Thomas", "Harlan Crow", "resignation", "impeachment", "public trust", "code of ethics" ];

async function GetWikiSummary(subject) {
    let result = {};

	try {
        result.subject = subject;
		const page = await wiki.page(subject);
        result.canonicalurl = page.canonicalurl;
	} catch (error) {
        result.error = error;
	}

    return result;
}

async function getWikiSummaries(subjects) {
    const results = [];
  
    for (const subject of subjects) {
      try {
        const summary = await GetWikiSummary(subject);
        results.push(summary);
      } catch (error) {
        results.push({ subject });
      }
    }
  
    return results;
}

console.log("Starting");

(async () => {
    const converted = await getWikiSummaries(subjects);
    //const converted = await GetWikiSummary('impeachment');
    console.log(JSON.stringify(converted, null, 2));
})();

console.log("Done");

the list is actually duplicated items to show that the first 10 resolve and the last 10 (even though they are the same) will throw page errors. If I have a lost of 20 items what is the recommended way to get 20?

the result of the above code looks like this:

Done
[
  {
    "subject": "University of Washington",
    "canonicalurl": "https://en.wikipedia.org/wiki/University_of_Washington"
  },
  {
    "subject": "USC Gould School of Law",
    "canonicalurl": "https://en.wikipedia.org/wiki/USC_Gould_School_of_Law"
  },
  {
    "subject": "Watergate",
    "canonicalurl": "https://en.wikipedia.org/wiki/Watergate_scandal"
  },
  {
    "subject": "Supreme Court",
    "canonicalurl": "https://en.wikipedia.org/wiki/Supreme_court"
  },
  {
    "subject": "Justice Clarence Thomas",
    "canonicalurl": "https://en.wikipedia.org/wiki/Clarence_Thomas"
  },
  {
    "subject": "Harlan Crow",
    "canonicalurl": "https://en.wikipedia.org/wiki/Harlan_Crow"
  },
  {
    "subject": "resignation",
    "canonicalurl": "https://en.wikipedia.org/wiki/Resignation"
  },
  {
    "subject": "impeachment",
    "canonicalurl": "https://en.wikipedia.org/wiki/Impeachment"
  },
  {
    "subject": "public trust",
    "canonicalurl": "https://en.wikipedia.org/wiki/Public_trust"
  },
  {
    "subject": "code of ethics",
    "canonicalurl": "https://en.wikipedia.org/wiki/Ethical_code"
  },
  {
    "subject": "University of Washington",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "USC Gould School of Law",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "Watergate",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "Supreme Court",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "Justice Clarence Thomas",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "Harlan Crow",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "resignation",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "impeachment",
    "error": {
      "name": "pageError"
    }
  },
  {
    "subject": "public trust",
    "canonicalurl": "https://en.wikipedia.org/wiki/Public_trust"
  },
  {
    "subject": "code of ethics",
    "error": {
      "name": "pageError"
    }
  }
]

Thanks for the help!

contractorwolf avatar Apr 16 '23 16:04 contractorwolf