playwright
playwright copied to clipboard
[Bug]: page.route does not disable http cache in Firefox
Version
1.41.0
Steps to reproduce
https://try.playwright.tech/?l=javascript&s=lxhfadd
// @ts-check
const playwright = require('playwright');
(async () => {
const browser = await playwright.firefox.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Route everything to disable the cache (this doesn't work)
// "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
await page.route('**/*', route => route.continue());
// Go to the homepage
await page.goto('https://www.digitalocean.com');
// Set a fast timeout, as we're expecting timeouts for this issue reproduction
page.setDefaultTimeout(1500);
for (let i = 0; i < 5; i++) {
await Promise.all([
page.waitForResponse(resp => {
// Look for the API request we're about to make
if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;
// Log the response we got, including the status that Playwright sees
console.log(i, 'playwright', resp.status());
// We want to wait for a successful response
return resp.ok();
}).catch(e => {
// Log a timeout but don't crash
console.log(i, e.message);
}),
// Make the API request and log the status that the browser sees
page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
]);
}
// Allow some time for final logs to flush before closing
await new Promise(res => setTimeout(res, 500));
await browser.close();
})();
Expected behavior
https://playwright.dev/docs/api/class-page#page-route states that when page.route is used, the HTTP cache should be disabled. As such, I'd expect resp.status() to return 200 every time, or the documentation to be updated to reflect that caching is not disabled in Firefox.
Actual behavior
On the first (or first few depending on how Firefox feels), the resp.status() is observed to be 200, however it then gets cached by Firefox and the resp.status() switches to being a 304 (while the status that the browser fetch call sees is still a 200).
Additional context
I believe this is somewhat related to https://github.com/microsoft/playwright/issues/27573
Environment
Unsure how to get this info from https://try.playwright.tech
I cannot reproduce it with the following snippet. Most likely the server sometimes returns 304. Note that the page will always see 200 in that case.
// @ts-check
const playwright = require('playwright');
const http = require('http');
const hostname = '127.0.0.1';
const port = 3000;
const host = `http://${hostname}:${port}/`;
const server = http.createServer((req, res) => {
console.log(req.url);
if (req.url === '/') {
res.statusCode = 200;
res.setHeader('Content-Type', 'text/plain');
res.end('Hello, World!\n');
} else if (req.url === '/api/static-content/v1/tags/ubuntu') {
res.statusCode = 200;
res.setHeader('Content-Type', 'text/plain');
res.end('Api response\n');
}
});
server.listen(port, hostname, () => {
console.log(`Server running at ${host}`);
});
(async () => {
const browser = await playwright.firefox.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Route everything to disable the cache (this doesn't work)
// "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
await page.route('**/*', route => route.continue());
// Go to the homepage
await page.goto(host);
// Set a fast timeout, as we're expecting timeouts for this issue reproduction
page.setDefaultTimeout(1500);
for (let i = 0; i < 500; i++) {
const [resp] = await Promise.all([
page.waitForResponse(resp => {
// Look for the API request we're about to make
if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;
// Log the response we got, including the status that Playwright sees
console.log(i, 'playwright', resp.status());
// We want to wait for a successful response
return resp.ok();
}).catch(e => {
// Log a timeout but don't crash
console.log(i, e.message);
}),
// Make the API request and log the status that the browser sees
page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
]);
if (resp.status() !== 200) {
throw new Error('Unexpected status' + resp.status());
}
}
console.log('Done');
// Allow some time for final logs to flush before closing
await new Promise(res => setTimeout(res, 500));
await browser.close();
server.close();
})();
Yes, the server is returning a 304, because the browser has a cached response and has asked the server if it can still use the cached version. The docs say that there should be no HTTP cache...?
Yes, the server is returning a 304, because the browser has a cached response and has asked the server if it can still use the cached version.
Why do you think so? It looks like a misbehaving server to me as the 304 response should only be sent if the request has a conditional header. In the example above the headers received by the server are the following:
"host": "127.0.0.1:3000",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0",
"accept": "*/*",
"accept-language": "en-US,en;q=0.5",
"accept-encoding": "gzip, deflate, br",
"referer": "http://127.0.0.1:3000/",
"connection": "keep-alive",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin"
The server here is Cloudflare's cache proxy, so I somewhat doubt it is misbehaving... have you tried setting a header on your response from the server telling the browser it is allowed to cache the response? The API I'm using to reproduce this allows for the response to be cached, hence subsequent requests are just getting a 304 back from Cloudflare as the cached version the browser has is still valid:
curl -is https://www.digitalocean.com/api/static-content/v1/tags/ubuntu | grep cache-control
cache-control: public, max-age=0, s-maxage=300, must-revalidate
Here you go:
https://try.playwright.tech/?l=javascript&s=3goo8py
// @ts-check
const playwright = require('playwright');
const http = require('http');
(async () => {
// Create a 'lil server for testing
const modified = new Date();
const server = http.createServer((req, res) => {
console.log('server', req.method, req.url);
// HTML root page
if (req.method === 'GET' && req.url === '/') {
res.statusCode = 200;
res.setHeader('Content-Type', 'text/html');
res.end('<html><body><p>Test</p></body></html>\n');
return;
}
// Demo API route
if (req.method === 'GET' && req.url === '/api/static-content/v1/tags/ubuntu') {
const now = new Date();
res.setHeader('Cache-Control', 'public, max-age=0, must-revalidate');
res.setHeader('Expires', '0');
res.setHeader('Vary', 'origin, access-control-request-method, access-control-request-headers');
res.setHeader('Age', Math.floor((now.getTime() - modified.getTime()) / 1000));
res.setHeader('Last-Modified', modified.toUTCString());
res.setHeader('Date', now.toUTCString());
// Allow for the browser to revalidate their cached version
const ifModified = req.headers['if-modified-since'];
if (ifModified) {
if (Math.floor(modified.getTime() / 1000) <= Math.floor(new Date(ifModified).getTime() / 1000)) {
res.statusCode = 304;
res.end();
return;
}
}
res.statusCode = 200;
res.setHeader('Content-Type', 'application/json');
res.end('{"test":1}\n');
return;
}
});
const hostname = '127.0.0.1';
const port = 3000;
const host = `http://${hostname}:${port}/`;
await new Promise(resolve => server.listen(port, hostname, resolve));
// Create the Playwright instance
const browser = await playwright.firefox.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Route everything to disable the cache (this doesn't work)
// "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
await page.route('**/*', route => route.continue());
// Go to the homepage
await page.goto(host);
// Set a fast timeout, as we're expecting timeouts for this issue reproduction
page.setDefaultTimeout(1500);
for (let i = 0; i < 5; i++) {
await Promise.all([
page.waitForResponse(resp => {
// Look for the API request we're about to make
if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;
// Log the response we got, including the status that Playwright sees
console.log(i, 'playwright', resp.status());
// We want to wait for a successful response
return resp.ok();
}).catch(e => {
// Log a timeout but don't crash
console.log(i, e.message);
}),
// Make the API request and log the status that the browser sees
page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
]);
}
// Allow some time for final logs to flush before closing
await new Promise(resolve => setTimeout(resolve, 500));
await browser.close();
await new Promise(resolve => server.close(resolve));
})();
If the docs are right that the HTTP cache is disabled when using page.route, then I'd expect Firefox to not be including the If-Modified-Since header (as is the case if you open the dev tools and disable the cache there).
Investigation notes: there is Page.setCacheDisabled in juggler but we don't call it, likely needs some work to make it reliable in popups and iframes.