node icon indicating copy to clipboard operation
node copied to clipboard

`parallel/test-runner-output` is flaky

Open avivkeller opened this issue 1 year ago • 2 comments

Test

parallel/test-runner-output

Platform

Windows

Console output

not ok 2761 parallel/test-runner-output
  ---
  duration_ms: 5831.75900
  severity: fail
  exitcode: 1
  stack: |-
    ▶ test runner output
      ✔ test-runner/output/abort.js (5044.243602ms)
      ✔ test-runner/output/abort-runs-after-hook.js (5024.467414ms)
      ✔ test-runner/output/abort_suite.js (5000.590488ms)
      ✔ test-runner/output/abort_hooks.js (4979.5575ms)
      ✔ test-runner/output/describe_it.js (4977.140656ms)
      ✔ test-runner/output/describe_nested.js (4925.394205ms)
      ✔ test-runner/output/eval_dot.js (4901.098336ms)
      ✔ test-runner/output/eval_spec.js (4837.353ms)
      ✔ test-runner/output/eval_tap.js (4710.94876ms)
      ✔ test-runner/output/filtered-suite-delayed-build.js (4681.152445ms)
      ✔ test-runner/output/filtered-suite-order.mjs (4642.805511ms)
      ✔ test-runner/output/filtered-suite-throws.js (4612.421303ms)
      ✔ test-runner/output/hooks.js (4622.774072ms)
      ✔ test-runner/output/hooks_spec_reporter.js (4589.467662ms)
      ✔ test-runner/output/skip-each-hooks.js (4526.797983ms)
      ✔ test-runner/output/suite-skip-hooks.js (4502.179664ms)
      ✔ test-runner/output/timeout_in_before_each_should_not_affect_further_tests.js (4422.895785ms)
      ✔ test-runner/output/hooks-with-no-global-test.js (4306.540227ms)
      ✔ test-runner/output/global-hooks-with-no-tests.js (4279.560221ms)
      ✔ test-runner/output/before-and-after-each-too-many-listeners.js (4201.921211ms)
      ✔ test-runner/output/before-and-after-each-with-timeout-too-many-listeners.js (4158.995317ms)
      ✔ test-runner/output/force_exit.js (4130.514523ms)
      ✔ test-runner/output/global_after_should_fail_the_test.js (4101.715736ms)
      ✔ test-runner/output/no_refs.js (4050.275822ms)
      ✔ test-runner/output/no_tests.js (3976.924663ms)
      ✔ test-runner/output/only_tests.js (3872.183748ms)
      ✔ test-runner/output/dot_reporter.js (3810.210886ms)
      ✔ test-runner/output/junit_reporter.js (3816.323072ms)
      ✔ test-runner/output/spec_reporter_successful.js (3710.15109ms)
      ✔ test-runner/output/spec_reporter.js (3722.466172ms)
      ✔ test-runner/output/spec_reporter_cli.js (3607.488084ms)
      ✔ test-runner/output/source_mapped_locations.mjs (3511.73783ms)
      ✔ test-runner/output/lcov_reporter.js (3515.362787ms)
      ✔ test-runner/output/output.js (3432.210248ms)
      ✔ test-runner/output/output_cli.js (3385.594322ms)
      ✔ test-runner/output/name_and_skip_patterns.js (3116.385549ms)
      ✔ test-runner/output/name_pattern.js (3012.778989ms)
      ✔ test-runner/output/name_pattern_with_only.js (2944.807975ms)
      ✔ test-runner/output/skip_pattern.js (2479.866823ms)
      ✔ test-runner/output/unfinished-suite-async-error.js (2393.255322ms)
      ✔ test-runner/output/unresolved_promise.js (2214.530445ms)
      ✖ test-runner/output/default_output.js (2081.418465ms)
        AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
        + actual - expected ... Lines skipped
        
          '[32m✔ should pass [90m(*ms)[39m[39m\n' +
            '[31m✖ should fail [90m(*ms)[39m[39m\n' +
        ...
            '  *[39m\n' +
            '  *[39m\n' +
        +   '  [90m    at async startSubtestAfterBootstrap (node:internal/test_runn'
        -   '  *[39m\n' +
        -   '\n' +
        -   '*\n' +
        -   '[31m✖ should fail [90m(*ms)[39m[39m\n' +
        -   '  Error: fail\n' +
        -   '      *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '      *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '  *[39m\n' +
        -   '\n' +
        -   '*\n' +
        -   '[31m✖ should pass but parent fail [90m(*ms)[39m[39m\n' +
        -   "  [32m'test did not finish before its parent and was cancelled'[39m\n"
            at assertSnapshot (/home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/common/assertSnapshot.js:56:12)
            at async Module.spawnAndAssert (/home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/common/assertSnapshot.js:91:3)
            at async TestContext.<anonymous> (file:///home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/parallel/test-runner-output.mjs:286:5)
            at async Test.run (node:internal/test_runner/test:935:9)
            at async Promise.all (index 41)
            at async Suite.run (node:internal/test_runner/test:1320:7)
            at async startSubtestAfterBootstrap (node:internal/test_runner/harness:297:3) {
          generatedMessage: true,
          code: 'ERR_ASSERTION',
          actual: '[32m✔ should pass [90m(*ms)[39m[39m\n[31m✖ should fail [90m(*ms)[39m[39m\n  Error: fail\n      *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n...',
          expected: '[32m✔ should pass [90m(*ms)[39m[39m\n[31m✖ should fail [90m(*ms)[39m[39m\n  Error: fail\n      *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n...',
          operator: 'strictEqual'
        }
    
      ✔ test-runner/output/arbitrary-output.js (1878.087741ms)
      ✔ test-runner/output/async-test-scheduling.mjs (1841.915158ms)
      ✔ test-runner/output/arbitrary-output-colored.js (2090.36852ms)
      ✔ test-runner/output/dot_output_custom_columns.js (1552.294798ms)
      ✔ test-runner/output/tap_escape.js (1461.577271ms)
      ✔ test-runner/output/test-runner-plan.js (1421.101897ms)
      ✔ test-runner/output/coverage_failure.js (1200.580922ms)
      ✔ test-runner/output/test-diagnostic-warning-without-test-only-flag.js (1083.959424ms)
      ✔ test-runner/output/coverage-width-40.mjs (1122.226143ms)
      ✔ test-runner/output/coverage-width-80.mjs (888.268393ms)
      ✔ test-runner/output/coverage-width-100.mjs (849.100504ms)
      ✔ test-runner/output/coverage-width-150.mjs (881.08359ms)
      ✔ test-runner/output/coverage-width-infinity.mjs (722.609533ms)
      ✔ test-runner/output/coverage-width-80-uncovered-lines.mjs (758.820816ms)
      ✔ test-runner/output/coverage-width-100-uncovered-lines.mjs (611.19314ms)
      ✔ test-runner/output/coverage-width-150-uncovered-lines.mjs (692.291311ms)
      ✔ test-runner/output/coverage-width-infinity-uncovered-lines.mjs (617.418721ms)
    ✖ test runner output (5372.954414ms)
    ℹ tests 59
    ℹ suites 1
    ℹ pass 58
    ℹ fail 1
    ℹ cancelled 0
    ℹ skipped 0
    ℹ todo 0
    ℹ duration_ms 5392.306818
    
    ✖ failing tests:
    
    test at test/parallel/test-runner-output.mjs:295:5
    ✖ test-runner/output/default_output.js (2081.418465ms)
      AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
      + actual - expected ... Lines skipped
      
        '[32m✔ should pass [90m(*ms)[39m[39m\n' +
          '[31m✖ should fail [90m(*ms)[39m[39m\n' +
      ...
          '  *[39m\n' +
          '  *[39m\n' +
      +   '  [90m    at async startSubtestAfterBootstrap (node:internal/test_runn'
      -   '  *[39m\n' +
      -   '\n' +
      -   '*\n' +
      -   '[31m✖ should fail [90m(*ms)[39m[39m\n' +
      -   '  Error: fail\n' +
      -   '      *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '      *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '  *[39m\n' +
      -   '\n' +
      -   '*\n' +
      -   '[31m✖ should pass but parent fail [90m(*ms)[39m[39m\n' +
      -   "  [32m'test did not finish before its parent and was cancelled'[39m\n"
          at assertSnapshot (/home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/common/assertSnapshot.js:56:12)
          at async Module.spawnAndAssert (/home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/common/assertSnapshot.js:91:3)
          at async TestContext.<anonymous> (file:///home/iojs/build/workspace/node-test-commit-aix/nodes/aix72-ppc64/test/parallel/test-runner-output.mjs:286:5)
          at async Test.run (node:internal/test_runner/test:935:9)
          at async Promise.all (index 41)
          at async Suite.run (node:internal/test_runner/test:1320:7)
          at async startSubtestAfterBootstrap (node:internal/test_runner/harness:297:3) {
        generatedMessage: true,
        code: 'ERR_ASSERTION',
        actual: '[32m✔ should pass [90m(*ms)[39m[39m\n[31m✖ should fail [90m(*ms)[39m[39m\n  Error: fail\n      *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n...',
        expected: '[32m✔ should pass [90m(*ms)[39m[39m\n[31m✖ should fail [90m(*ms)[39m[39m\n  Error: fail\n      *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n  *[39m\n...',
        operator: 'strictEqual'
      }
  ...

Build links

  • https://ci.nodejs.org/job/node-test-binary-windows-js-suites/30580/RUN_SUBSET=0,nodes=win11-arm64-COMPILED_BY-vs2022-arm64/testReport/junit/(root)/parallel/test_runner_coverage/

Additional information

I feel this is likely an issue with replaceStackTrace in assertSnapshot, since startSubtestAfterBootstrap is logged...

avivkeller avatar Oct 15 '24 00:10 avivkeller

It was previously reported in https://github.com/nodejs/node/issues/52139

aduh95 avatar Oct 16 '24 07:10 aduh95

It was previously reported in #52139

FWIW That flake was caused by the different issue

avivkeller avatar Oct 16 '24 12:10 avivkeller

I've been working on updating the old node-core-test library, and during that process, I believe I found the source of this flake, or at least information that will help: I don't think it's probably stripping some cases

avivkeller avatar Oct 27 '24 01:10 avivkeller

@RedYetiDev could you be more specific? I'm not sure I understand your previous comment.

cjihrig avatar Oct 28 '24 13:10 cjihrig

Yes, sorry for the confusion. During my porting of the test runner for node-core-test, I noticed that the common/assertSnapshot.js file wasn't trimming all of the stack traces.

In this case, the failing test is test-runner/output/default_output.js, which uses the specTransform function.

Essentially, the specTransform function looks like:

Everything
import { fileURLToPath } from 'node:url';
import path from 'node:path';

const stackFramesRegexp = /(?<=\n)(\s+)((.+?)\s+\()?(?:\(?(.+?):(\d+)(?::(\d+))?)\)?(\s+\{)?(\[\d+m)?(\n|$)/g;
const windowNewlineRegexp = /\r/g;

function replaceNodeVersion(str) {
  return str.replaceAll(process.version, '*');
}

function replaceStackTrace(str, replacement = '$1*$7$8\n') {
  return str.replace(stackFramesRegexp, replacement);
}

function replaceWindowsLineEndings(str) {
  return str.replace(windowNewlineRegexp, '');
}

function replaceWindowsPaths(str) {
  return (process.platform === 'win32') ? str.replaceAll(path.win32.sep, path.posix.sep) : str;
}

function replaceFullPaths(str) {
  return str.replaceAll('\\\'', "'").replaceAll(path.resolve(__dirname, '../..'), '');
}

function transform(...args) {
  return (str) => args.reduce((acc, fn) => fn(acc), str);
}

const root = fileURLToPath(new URL('../..', import.meta.url)).slice(0, -1);
const color = '(\\[\\d+m)';
const stackTraceBasePath = new RegExp(`${color}\\(${root.replaceAll(/[\\^$*+?.()|[\]{}]/g, '\\$&')}/?${color}(.*)${color}\\)`, 'g');

function replaceSpecDuration(str) {
  return str
    .replaceAll(/[0-9.]+ms/g, '*ms')
    .replaceAll(/duration_ms [0-9.]+/g, 'duration_ms *')
    .replace(stackTraceBasePath, '$3');
}

const specTransform = transform(
  replaceSpecDuration,
  replaceWindowsLineEndings,
  replaceStackTrace,
  replaceWindowsPaths,
);

For typical stack traces, this is fine:

const output = `
Error: ...
  [90m    at Test.runInAsyncScope (node:async_hooks:211:14)[39m
  [90m    at Test.run (node:internal/test_runner/test:934:25)[39m
  [90m    at Test.processPendingSubtests (node:internal/test_runner/test:633:18)[39m
  [90m    at Test.postRun (node:internal/test_runner/test:1045:19)[39m
`.trim();

console.log(specTransform(output));
Error: ...
  *[39m
  *[39m
  *[39m
  *[39m

However, if there's a slight difference in the output, such as it being cut off for some reason:

const output = `
Error: ...
  [90m    at Test.runInAsyncScope (node:async_hooks:211:14)[39m
  [90m    at Test.run (node:internal/test_runner/test:934:25)[39m
  [90m    at Test.processPendingSubtests (node:internal/test_runner/test:633:18)[39m
  [90m    at Test.postRun (node:internal/test
`.trim();

console.log(specTransform(output));
Error: ...
  *[39m
  *[39m
  *[39m
  [90m    at Test.postRun (node:internal/test

The line is logged.

avivkeller avatar Oct 28 '24 21:10 avivkeller

Something is causing it to be cutoff (maybe?) which is an issue, however, IMO the stack remover shouldn't be so strict, I.E. it should know that "[90m at" is a stack trace, and handle the whole line.

avivkeller avatar Oct 28 '24 21:10 avivkeller

This has caused 9 PR to fail out of the last 100 CI runs: https://github.com/nodejs/reliability/blob/main/reports/2025-10-17.md

We should consider marking it as flaky unless someone is invested in fixing it.

cc @nodejs/test_runner

See failure
18:58:06 not ok 2761 parallel/test-runner-output
18:58:06   ---
18:58:06   duration_ms: 16679.80300
18:58:06   severity: fail
18:58:06   exitcode: 1
18:58:06   stack: |-
18:58:06     Test failure: 'test-runner/output/test-timeout-flag.js'
18:58:06     Location: test/parallel/test-runner-output.mjs:356:5
18:58:06     AssertionError [ERR_ASSERTION]: Expected values to be strictly equal:
18:58:06     + actual - expected
18:58:06     ... Skipped lines
18:58:06     
18:58:06       'TAP version 13\n' +
18:58:06         '# Subtest: --test-timeout is set to 100ms\n' +
18:58:06         '    # Subtest: should timeout after 100ms\n' +
18:58:06         '    not ok 1 - should timeout after 100ms\n' +
18:58:06         '      ---\n' +
18:58:06     ...
18:58:06         "      location: '/test/fixtures/test-runner/output/test-timeout-flag.js:(LINE):3'\n" +
18:58:06     +   "      failureType: 'testTimeoutFailure'\n" +
18:58:06     +   "      error: 'test timed out after 100ms'\n" +
18:58:06     -   "      failureType: 'cancelledByParent'\n" +
18:58:06     -   "      error: 'test did not finish before its parent and was cancelled'\n" +
18:58:06         "      code: 'ERR_TEST_FAILURE'\n" +
18:58:06     +   '      stack: |-\n' +
18:58:06     +   '        async Promise.all (index 0)\n' +
18:58:06         '      ...\n' +
18:58:06         '    1..1\n' +
18:58:06         'not ok 2 - should inherit timeout options to children\n' +
18:58:06         '  ---\n' +
18:58:06         '  duration_ms: *\n' +
18:58:06         "  type: 'suite'\n" +
18:58:06         "  location: '/test/fixtures/test-runner/output/test-timeout-flag.js:(LINE):1'\n" +
18:58:06     +   "  failureType: 'subtestsFailed'\n" +
18:58:06     +   "  error: '1 subtest failed'\n" +
18:58:06     -   "  failureType: 'testTimeoutFailure'\n" +
18:58:06     -   "  error: 'test timed out after 1ms'\n" +
18:58:06         "  code: 'ERR_TEST_FAILURE'\n" +
18:58:06         '  ...\n' +
18:58:06         '1..2\n' +
18:58:06         '# tests 5\n' +
18:58:06         '# suites 2\n' +
18:58:06     
18:58:06         at assertSnapshot (/Users/admin/build/workspace/node-test-commit-osx/nodes/macos15-x64/test/common/assertSnapshot.js:65:12)
18:58:06         at async Module.spawnAndAssert (/Users/admin/build/workspace/node-test-commit-osx/nodes/macos15-x64/test/common/assertSnapshot.js:100:3)
18:58:06         at async TestContext.<anonymous> (file:///Users/admin/build/workspace/node-test-commit-osx/nodes/macos15-x64/test/parallel/test-runner-output.mjs:347:5)
18:58:06         at async Test.run (node:internal/test_runner/test:1113:7)
18:58:06         at async Promise.all (index 17)
18:58:06         at async Suite.run (node:internal/test_runner/test:1518:7)
18:58:06         at async startSubtestAfterBootstrap (node:internal/test_runner/harness:358:3) {
18:58:06       generatedMessage: true,
18:58:06       code: 'ERR_ASSERTION',
18:58:06       actual: 'TAP version 13\n' +
18:58:06         '# Subtest: --test-timeout is set to 100ms\n' +
18:58:06         '    # Subtest: should timeout after 100ms\n' +
18:58:06         '    not ok 1 - should timeout after 100ms\n' +
18:58:06         '      ---\n' +
18:58:06         '      duration_ms: *\n' +
18:58:06         "      type: 'test'\n" +
18:58:06         "      location: '/test/fixtures/test-runner/output/test-timeout-flag.js:(LINE):3'\n" +
18:58:06         "      failureType: 'testTimeoutFailure'\n" +
18:58:06         "      error: 'test timed out after 100ms'\n" +
18:58:06         '...',
18:58:06       expected: 'TAP version 13\n' +
18:58:06         '# Subtest: --test-timeout is set to 100ms\n' +
18:58:06         '    # Subtest: should timeout after 100ms\n' +
18:58:06         '    not ok 1 - should timeout after 100ms\n' +
18:58:06         '      ---\n' +
18:58:06         '      duration_ms: *\n' +
18:58:06         "      type: 'test'\n" +
18:58:07         "      location: '/test/fixtures/test-runner/output/test-timeout-flag.js:(LINE):3'\n" +
18:58:07         "      failureType: 'testTimeoutFailure'\n" +
18:58:07         "      error: 'test timed out after 100ms'\n" +
18:58:07         '...',
18:58:08       operator: 'strictEqual',
18:58:08       diff: 'simple'
18:58:08     }

joyeecheung avatar Oct 17 '25 11:10 joyeecheung

Actually this test should be split up before being marked as flaky. It's one of those tests that squeezed tons of test cases in one file (sigh) and marking it as flaky shadows other potentially non-flaky cases.

joyeecheung avatar Oct 19 '25 20:10 joyeecheung