cucumber-js Using -parallel and redirecting stdout / logging

Using -parallel and redirecting stdout / logging

Open AndrejMitrovic opened this issue 7 months ago • 1 comments

Hi,

Is there a way to set console / stdout redirects for each cucumber worker when running with the -parallel switch?

I have a project where parallelism is implemented via a shell script that spawns multiple cucumber processes, each redirecting stdout / stderr to its own file.

This works nicely. But this limits parallelism on the feature file level.

I'd like to utilize scenario-level parallelism. -parallelism is perfect for that and it works great. I can route any console logging of the process itself to a specific file. But I can't seem to hook into cucumber to do the same. So Gherkin steps are still shown interleaved.

Essentially I set up a logger such as winston, and simply redirect the console:

const logger = winston.createLogger(...)

console.log = (message) => {
    logger.info(message);
};

console.info = (message) => {
    logger.info(message);
};

...

Works great for the process, but Cucumber itself likely writes directly to stdout, which I'm not touching here.

Apr 29 '25 05:04 AndrejMitrovic

For anyone else that might be running into the same problem, I've come up with a solution that seems to work for the most part.

First, I've defined a logger module logger.js:

const winston = require("winston");
const { transports } = winston;
const util = require("util");
const Transport = require("winston-transport");
const path = require("path");
const fs = require("fs");

function customTimestamp() {
    return new Date().toISOString().replace("T", " ").split(".")[0];
}

class SyncFileTransport extends Transport {
    constructor(opts) {
        super(opts);
        this.filename = opts.filename || "default.log";
        this.filepath = path.resolve(this.filename);

        this.groupId = opts.groupId;
        this.scenarioID = opts.scenarioID;
    }

    log(info, callback) {
        setImmediate(() => this.emit("logged", info));

        const timestamp = customTimestamp();
        const level = info.level.toUpperCase();
        const safeMessage =  info.message == null
                                ? ""
                                : typeof info.message === "object"
                                ? JSON.stringify(info.message)
                                : String(info.message);

        // TODO: this is duplicated with prettyFormat() below because we can't use
        // both a formatter and a transport
        const formatted =
            safeMessage
                .split("\n")
                .map((line) => {
                    if (line.trim() === "<BLANK>") return ``;

                    if (this.scenarioID != null)
                        return `${timestamp} ${level} [${this.groupId}] [${this.scenarioID}] ${line}`;
                    else
                        return `${timestamp} ${level} [${this.groupId}] ${line}`; // spec
                })
                .join("\n") + "\n";

        try {
            fs.appendFileSync(this.filepath, formatted, "utf8");
        } catch (err) {
            console.error(`Failed to write to ${this.filepath}:`, err);
        }

        if (callback) callback();
    }
}

export function setupLogger(
    group,
    scenarioID,
    testCaseId,
    redirectConsole
) {
    const customTimestamp = () => {
        const now = new Date();
        return now.toISOString().replace("T", " ").replace("Z", " UTC");
    };

    const prettyFormat = winston.format.combine(
        winston.format.printf(({ level, message }) => {
            const timestamp = customTimestamp();
            const groupId = group || "unknown-group";

            const safeMessage = message == null
                ? ""
                : typeof message === "object"
                ? JSON.stringify(message)
                : String(message);

            const prefixedMessage = safeMessage
                .split("\n")
                .map((line) => {
                    if (line.trim() === "<BLANK>") return ``;

                    if (scenarioID != null)
                        return `${timestamp} ${level} [${groupId}] [${scenarioID}] ${line}`;
                    else return `${timestamp} ${level} [${groupId}] ${line}`; // spec
                })
                .join("\n");

            return prefixedMessage;
        })
    );

    const logger = winston.createLogger({
        level: "info",
        format: prettyFormat,
    });

    const consoleTransport = new transports.Console({
        format: winston.format.combine(winston.format.colorize(), prettyFormat),
    });

    logger.add(consoleTransport); // Always log to console

    if (group != null) {
        // no color in the file log
        const syncFileTransport = new SyncFileTransport({
            filename: `./tmp/log-${testCaseId}.log`,
            options: { flags: "a" },
            groupId: group,
            scenarioID: scenarioID,
        });
        logger.add(syncFileTransport);
    }

    // Redirect all the test code's usage of `console.*`,
    // but for the parent we don't need this.
    if (redirectConsole) {
        console.log = (message) => {
            logger.info(message);
        };

        console.info = (message) => {
            logger.info(message);
        };

        console.warn = (message) => {
            logger.warn(message);
        };

        console.error = (message) => {
            logger.error(message);
        };

        console.debug = (message) => {
            logger.debug(message);
        };
    }

    return logger;
}

A few things to note here:

This logger is used both by the parent process and the individual worker processes. Each process spawns their own logger instance, however the loggers will write to the same file if the test case IDs match. This way the parent process which emits Gherkin spec steps will write to the same file as the child process that actually runs test code.
It uses a sync file transport which essentially makes it flush to disk on every write rather than do internal buffering. This is required since both the parent and child process will write to the same file for a given scenario ID. This ensures there's at least some ordering to the messages. It's not 100% fool-proof, I've seen some log lines show up out of order.
The group is domain-specific, but essentially for my use-case each scenario can run in parallel but the tests need to run against a certain backend group. This isn't important here so feel free to ignore / remove.
There's some idiosyncrasies here specific to my use-case such as custom formatting, the odd <BLANK> workaround to force newlines, you can safely ignore all this stuff.

And then here's a custom formatter.js:

// formatter.js
const { Formatter } = require("@cucumber/cucumber");
const logger = require("./logger.js");
const { ProgressFormatter } = require("@cucumber/cucumber");

let envelopes = {
    testCaseStarted: new Map(),
    testCase: new Map(),
    pickle: new Map(),
    gherkinDocument: new Map(),
    testStep: new Map(),
};

let loggersMap = new Map();

function getLogger(testCaseId) {
    let log = loggersMap.get(testCaseId);
    if (!log) {
        log = logger.setupLogger("Spec", null, testCaseId, false);
        loggersMap.set(testCaseId, log);
    }

    return log;
}

class CustomFormatter extends Formatter {
    constructor(options) {
        super(options);

        options.eventBroadcaster.on("envelope", (envelope) => {
            if (envelope.testCaseStarted) {
                const log = getLogger(envelope.testCaseStarted.id);
                log.info(
                    `Test-case started: ${envelope.testCaseStarted.id}`
                );

                envelopes.testCaseStarted.set(
                    envelope.testCaseStarted.id,
                    envelope.testCaseStarted
                );
            }

            if (envelope.testCase) {
                envelopes.testCase.set(envelope.testCase.id, envelope.testCase);
            }

            if (envelope.pickle) {
                envelopes.pickle.set(envelope.pickle.id, envelope.pickle);
            }

            if (envelope.gherkinDocument) {
                envelopes.gherkinDocument.set(
                    envelope.gherkinDocument.uri,
                    envelope.gherkinDocument
                );
            }

            if (envelope.testStepFinished) {
                const testCaseId = envelope.testStepFinished.testCaseStartedId;
                const log = getLogger(testCaseId);

                if (
                    envelope.testStepFinished?.testStepResult?.status ===
                    "FAILED"
                ) {
                    const message =
                        envelope.testStepFinished.testStepResult.message ?? "";

                    log.error(`Step failed: ${message}`);
                    log.info("<BLANK>");
                }
            }

            if (envelope.testStepStarted) {
                const { testStepId, testCaseStartedId } =
                    envelope.testStepStarted;
                const log = getLogger(testCaseStartedId);

                const testCaseStarted =
                    envelopes.testCaseStarted.get(testCaseStartedId);
                if (!testCaseStarted) {
                    return;
                }

                const testCase = envelopes.testCase.get(
                    testCaseStarted.testCaseId
                );
                if (!testCase) {
                    return;
                }

                const testStep = testCase.testSteps.find(
                    (ts) => ts.id === testStepId
                );
                if (!testStep || !testStep.pickleStepId) {
                    // console.log(`Not a Gherkin step: ${testStepId}`);
                    return;
                }

                const pickle = envelopes.pickle.get(testCase.pickleId);

                const pickleStep = pickle.steps.find(
                    (ps) => ps.id === testStep.pickleStepId
                );

                const astNodeId = pickleStep.astNodeIds?.[0];

                const gherkinDoc = envelopes.gherkinDocument.get(pickle.uri);

                // This handles all feature children: Background, Rule, Scenario
                const allSteps = gherkinDoc.feature.children.flatMap(
                    (child) => {
                        if (child.rule) {
                            return child.rule.children.flatMap(
                                (ruleChild) => ruleChild.scenario?.steps ?? []
                            );
                        }
                        if (child.scenario) {
                            return child.scenario.steps || [];
                        }
                        if (child.background) {
                            return child.background.steps || [];
                        }
                        return [];
                    }
                );

                const gherkinStep = allSteps.find(
                    (step) => step.id === astNodeId
                );
                if (!gherkinStep) {
                    // log.info(`Step with astNodeId not found: ${astNodeId}`);
                    return;
                }

                log.info("<BLANK>");
                log.info(`${gherkinStep.keyword}${gherkinStep.text}`);

                // Log data table or doc string if present
                if (gherkinStep.dataTable) {
                    const tableStr = formatDataTable(gherkinStep.dataTable);
                    // log.info(`Step data table:\n${tableStr}`);
                    log.info(`${tableStr}`);
                }
                log.info("<BLANK>");
            }
        });
    }
}

/// Data tables become unformatted, re-format them for readability
function formatDataTable(dataTable) {
    const rows = dataTable.rows.map((row) =>
        row.cells.map((cell) => cell.value)
    );

    const colWidths = rows[0].map((_, colIndex) =>
        Math.max(...rows.map((row) => row[colIndex]?.length || 0))
    );

    const formattedRows = rows.map(
        (row) =>
            "| " +
            row.map((cell, i) => cell.padEnd(colWidths[i])).join(" | ") +
            " |"
    );

    return formattedRows.join("\n");
}

// for now assume that any time --parallel is used we want parallelism, even if --parallelism 1,
// it's too hard to parse this properly
function isParallelEnabled() {
    return process.argv.find((arg) => arg.startsWith("--parallel"));
}

if (isParallelEnabled()) {
    module.exports = CustomFormatter;
} else {
    module.exports = ProgressFormatter;
}

You'll notice the parent doesn't know anything about the scenario ID, this is because I use a custom hash for a scenario ID. This could easily be changed for someone else's purpose.

And then in cucumber.js we just add it to the list of arguments, for example:

let common = [
    "--require ./setup.js",
    "--format ./formatter.js",
].join(" ");

module.exports = {
    default: `${common} -f json:reports/cucumber-report.json`,
};

And finally in a Before hook I also make sure the child process is using the same logger:

Before({ timeout: 180 * 1000 }, async function (scenario) {
    this.testData.scenarioId = ...;  // some UUID

    let logger;
    if (isParallelEnabled()) {
        logger = setupLogger(
            getCptyGroup(),
            this.testData.scenarioId,
            scenario.testCaseStartedId,
            true
        );
    }

    // use logger directly if needed, or just console.log() that will redirect there

Then with --parallel 2 everything works out fine.

There's probably better ways to do all of this. But this is the best I could come up with.

I've also attempted IPC instead of using a shared log file, but Cucumber intercepts any messages from the child process and will throw if it's not well-formed.

May 04 '25 06:05 AndrejMitrovic

cucumber-js cucumber-js copied to clipboard

Using -parallel and redirecting stdout / logging

cucumber-js
cucumber-js copied to clipboard