browsertrix icon indicating copy to clipboard operation
browsertrix copied to clipboard

Backend test intermittently fails

Open tw4l opened this issue 10 months ago • 0 comments

Every so often, the test_delete_form_upload_and_crawls_from_all_crawls test in test_uploads.py fails due to the workflow size not updating as expected (at least in the time that the test waits). We should fix this so that we don't get unexpected and unhelpful test failures in CI.

Sample output of failed test:

______________ test_delete_form_upload_and_crawls_from_all_crawls ______________

admin_auth_headers = {'Authorization': '***'}
crawler_auth_headers = {'Authorization': '***'}
default_org_id = 'b7712532-2e46-4d1b-957e-92b501ecd03a'
all_crawls_delete_crawl_ids = ['manual-20241218160329-8716ab13-4ec', 'manual-20241218160404-8716ab13-4ec', 'a3325cf8-6059-49ad-b771-75358d0d0299']
all_crawls_delete_config_id = '8716ab13-4ecb-43ba-b8c6-d78443c0fa76'
upload_id_2 = 'a3325cf8-6059-49ad-b771-75358d0d0299'

    def test_delete_form_upload_and_crawls_from_all_crawls(
        admin_auth_headers,
        crawler_auth_headers,
        default_org_id,
        all_crawls_delete_crawl_ids,
        all_crawls_delete_config_id,
        upload_id_2,
    ):
        crawls_to_delete = all_crawls_delete_crawl_ids
        crawls_to_delete.append(upload_id_2)
    
        # Get org metrics
        r = requests.get(
            f"{API_PREFIX}/orgs/{default_org_id}/metrics",
            headers=admin_auth_headers,
        )
        data = r.json()
    
        org_bytes = data["storageUsedBytes"]
        org_crawl_bytes = data["storageUsedCrawls"]
        org_upload_bytes = data["storageUsedUploads"]
    
        # Get workflow and crawl sizes
        r = requests.get(
            f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{all_crawls_delete_config_id}",
            headers=admin_auth_headers,
        )
        workflow_size = r.json()["totalSize"]
    
        crawl_id_1 = all_crawls_delete_crawl_ids[0]
        r = requests.get(
            f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id_1}/replay.json",
            headers=admin_auth_headers,
        )
    
            if data["storageUsedCrawls"] != org_crawl_bytes - combined_crawl_size:
                all_good = False
    
            if data["storageUsedUploads"] != org_upload_bytes - upload_size:
                all_good = False
    
            if all_good:
                break
    
            if count + 1 == MAX_ATTEMPTS:
                assert False
    
            time.sleep(5)
            count += 1
    
        count = 0
        while count < MAX_ATTEMPTS:
            r = requests.get(
                f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{all_crawls_delete_config_id}",
                headers=admin_auth_headers,
            )
            if r.json()["totalSize"] == workflow_size - combined_crawl_size:
                break
    
            if count + 1 == MAX_ATTEMPTS:
>               assert False
E               assert False

backend/test/test_uploads.py:1104: AssertionError

tw4l avatar Dec 18 '24 16:12 tw4l