playwright icon indicating copy to clipboard operation
playwright copied to clipboard

[Bug]: page.route does not disable http cache in Firefox

Open MattIPv4 opened this issue 1 year ago • 7 comments
trafficstars

Version

1.41.0

Steps to reproduce

https://try.playwright.tech/?l=javascript&s=lxhfadd

// @ts-check
const playwright = require('playwright');

(async () => {
    const browser = await playwright.firefox.launch();
    const context = await browser.newContext();
    const page = await context.newPage();

    // Route everything to disable the cache (this doesn't work)
    // "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
    await page.route('**/*', route => route.continue());

    // Go to the homepage
    await page.goto('https://www.digitalocean.com');

    // Set a fast timeout, as we're expecting timeouts for this issue reproduction
    page.setDefaultTimeout(1500);

    for (let i = 0; i < 5; i++) {
        await Promise.all([
            page.waitForResponse(resp => {
                // Look for the API request we're about to make
                if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;

                // Log the response we got, including the status that Playwright sees
                console.log(i, 'playwright', resp.status());

                // We want to wait for a successful response
                return resp.ok();
            }).catch(e => {
                // Log a timeout but don't crash
                console.log(i, e.message);
            }),
            // Make the API request and log the status that the browser sees
            page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
        ]);
    }

    // Allow some time for final logs to flush before closing
    await new Promise(res => setTimeout(res, 500));
    await browser.close();
})();

Expected behavior

https://playwright.dev/docs/api/class-page#page-route states that when page.route is used, the HTTP cache should be disabled. As such, I'd expect resp.status() to return 200 every time, or the documentation to be updated to reflect that caching is not disabled in Firefox.

Actual behavior

On the first (or first few depending on how Firefox feels), the resp.status() is observed to be 200, however it then gets cached by Firefox and the resp.status() switches to being a 304 (while the status that the browser fetch call sees is still a 200).

Additional context

I believe this is somewhat related to https://github.com/microsoft/playwright/issues/27573

Environment

Unsure how to get this info from https://try.playwright.tech

MattIPv4 avatar Mar 19 '24 14:03 MattIPv4

I cannot reproduce it with the following snippet. Most likely the server sometimes returns 304. Note that the page will always see 200 in that case.

// @ts-check
const playwright = require('playwright');

const http = require('http');

const hostname = '127.0.0.1';
const port = 3000;

const host = `http://${hostname}:${port}/`;

const server = http.createServer((req, res) => {
  console.log(req.url);
  if (req.url === '/') {
    res.statusCode = 200;
    res.setHeader('Content-Type', 'text/plain');
    res.end('Hello, World!\n');
  } else if (req.url === '/api/static-content/v1/tags/ubuntu') {
    res.statusCode = 200;
    res.setHeader('Content-Type', 'text/plain');
    res.end('Api response\n');
  }
});

server.listen(port, hostname, () => {
  console.log(`Server running at ${host}`);
});


(async () => {
    const browser = await playwright.firefox.launch();
    const context = await browser.newContext();
    const page = await context.newPage();

    // Route everything to disable the cache (this doesn't work)
    // "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
    await page.route('**/*', route => route.continue());

    // Go to the homepage
    await page.goto(host);

    // Set a fast timeout, as we're expecting timeouts for this issue reproduction
    page.setDefaultTimeout(1500);

    for (let i = 0; i < 500; i++) {
        const [resp] = await Promise.all([
            page.waitForResponse(resp => {
                // Look for the API request we're about to make
                if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;

                // Log the response we got, including the status that Playwright sees
                console.log(i, 'playwright', resp.status());

                // We want to wait for a successful response
                return resp.ok();
            }).catch(e => {
                // Log a timeout but don't crash
                console.log(i, e.message);
            }),
            // Make the API request and log the status that the browser sees
            page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
        ]);
        if (resp.status() !== 200) {
            throw new Error('Unexpected status' + resp.status());
        }
    }

    console.log('Done');

    // Allow some time for final logs to flush before closing
    await new Promise(res => setTimeout(res, 500));
    await browser.close();

    server.close();
})();

yury-s avatar Mar 19 '24 16:03 yury-s

Yes, the server is returning a 304, because the browser has a cached response and has asked the server if it can still use the cached version. The docs say that there should be no HTTP cache...?

MattIPv4 avatar Mar 19 '24 17:03 MattIPv4

Yes, the server is returning a 304, because the browser has a cached response and has asked the server if it can still use the cached version.

Why do you think so? It looks like a misbehaving server to me as the 304 response should only be sent if the request has a conditional header. In the example above the headers received by the server are the following:

  "host": "127.0.0.1:3000",
  "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0",
  "accept": "*/*",
  "accept-language": "en-US,en;q=0.5",
  "accept-encoding": "gzip, deflate, br",
  "referer": "http://127.0.0.1:3000/",
  "connection": "keep-alive",
  "sec-fetch-dest": "empty",
  "sec-fetch-mode": "cors",
  "sec-fetch-site": "same-origin"

yury-s avatar Mar 19 '24 22:03 yury-s

The server here is Cloudflare's cache proxy, so I somewhat doubt it is misbehaving... have you tried setting a header on your response from the server telling the browser it is allowed to cache the response? The API I'm using to reproduce this allows for the response to be cached, hence subsequent requests are just getting a 304 back from Cloudflare as the cached version the browser has is still valid:

curl -is https://www.digitalocean.com/api/static-content/v1/tags/ubuntu | grep cache-control
cache-control: public, max-age=0, s-maxage=300, must-revalidate

MattIPv4 avatar Mar 19 '24 22:03 MattIPv4

Here you go:

https://try.playwright.tech/?l=javascript&s=3goo8py

// @ts-check
const playwright = require('playwright');
const http = require('http');

(async () => {
    // Create a 'lil server for testing
    const modified = new Date();
    const server = http.createServer((req, res) => {
        console.log('server', req.method, req.url);

        // HTML root page
        if (req.method === 'GET' && req.url === '/') {
            res.statusCode = 200;
            res.setHeader('Content-Type', 'text/html');
            res.end('<html><body><p>Test</p></body></html>\n');
            return;
        }

        // Demo API route
        if (req.method === 'GET' && req.url === '/api/static-content/v1/tags/ubuntu') {
            const now = new Date();
            res.setHeader('Cache-Control', 'public, max-age=0, must-revalidate');
            res.setHeader('Expires', '0');
            res.setHeader('Vary', 'origin, access-control-request-method, access-control-request-headers');
            res.setHeader('Age', Math.floor((now.getTime() - modified.getTime()) / 1000));
            res.setHeader('Last-Modified', modified.toUTCString());
            res.setHeader('Date', now.toUTCString());

            // Allow for the browser to revalidate their cached version 
            const ifModified = req.headers['if-modified-since'];
            if (ifModified) {
                if (Math.floor(modified.getTime() / 1000) <= Math.floor(new Date(ifModified).getTime() / 1000)) {
                    res.statusCode = 304;
                    res.end();
                    return;
                }
            }

            res.statusCode = 200;
            res.setHeader('Content-Type', 'application/json');
            res.end('{"test":1}\n');
            return;
        }
    });
    const hostname = '127.0.0.1';
    const port = 3000;
    const host = `http://${hostname}:${port}/`;
    await new Promise(resolve => server.listen(port, hostname, resolve));

    // Create the Playwright instance
    const browser = await playwright.firefox.launch();
    const context = await browser.newContext();
    const page = await context.newPage();

    // Route everything to disable the cache (this doesn't work)
    // "Enabling routing disables http cache." per https://playwright.dev/docs/api/class-page#page-route
    await page.route('**/*', route => route.continue());

    // Go to the homepage
    await page.goto(host);

    // Set a fast timeout, as we're expecting timeouts for this issue reproduction
    page.setDefaultTimeout(1500);

    for (let i = 0; i < 5; i++) {
        await Promise.all([
            page.waitForResponse(resp => {
                // Look for the API request we're about to make
                if (!resp.url().includes('/api/static-content/v1/tags/ubuntu')) return;

                // Log the response we got, including the status that Playwright sees
                console.log(i, 'playwright', resp.status());

                // We want to wait for a successful response
                return resp.ok();
            }).catch(e => {
                // Log a timeout but don't crash
                console.log(i, e.message);
            }),
            // Make the API request and log the status that the browser sees
            page.evaluate('fetch("/api/static-content/v1/tags/ubuntu").then(resp => resp.status)').then(status => console.log(i, 'browser', status)),
        ]);
    }

    // Allow some time for final logs to flush before closing
    await new Promise(resolve => setTimeout(resolve, 500));
    await browser.close();
    await new Promise(resolve => server.close(resolve));
})();

MattIPv4 avatar Mar 19 '24 22:03 MattIPv4

If the docs are right that the HTTP cache is disabled when using page.route, then I'd expect Firefox to not be including the If-Modified-Since header (as is the case if you open the dev tools and disable the cache there).

MattIPv4 avatar Mar 19 '24 22:03 MattIPv4

Investigation notes: there is Page.setCacheDisabled in juggler but we don't call it, likely needs some work to make it reliable in popups and iframes.

yury-s avatar Mar 19 '24 23:03 yury-s