jenkins.io changelogs converted

Specifically @timja as release master

Note: weekly.yaml is indented by 2 spaces, not sure why, but stuck with convention

Added via - found the right line in vim, then :r! the command below.

node convertChangelogStable2.mjs ./content/_partials/changelog-old.html | sed -e 's/^/  /'

node convertChangelogStable2.mjs ./content/_partials/changelog-stable.html

Script (not in commit message this time):

/*
 * Converts https://github.com/jenkins-infra/jenkins.io/blob/master/content/_partials/changelog-stable.html which is a bunch of html4
 * to structured yaml like https://github.com/jenkins-infra/jenkins.io/blob/master/content/_data/changelogs/lts.yml
 */
import YAML from 'yaml';
import fs from 'fs';
import cheerio from 'cheerio';

let releases = [];
const $ = cheerio.load(fs.readFileSync(process.argv.slice(2)[0]).toString().replace(/<h4/g, '<h3').replace(/<\/h4/g, '</h3'));

const machineVersion = (version) => {
    const padArrayEnd = (arr, len, padding) => {
        return arr.concat(Array(len - arr.length).fill(padding));
    }
    // make sure the version has 3 parts and 5 length (just in case)
    // so 1.2.3 and 1.2 sort right
    // 2.29 => 00002_00029_00000
    // 2.290 => 00002_00290_00000
    return padArrayEnd(version.split('.'), 3, 0).map(val => val.toString().padStart(5, "0")).join("_");
}


let $node = $('h3:first')
while (1) {
    let tagName = null;
    try {tagName = $node.prop("tagName")} catch (e) {break;}

    if ($node.is('h3')) {
        const release = {
            version: $node.attr('id')?.replace('v', ''),
            changes: [],
        }
        if (!release.version) {
            "content/_data/changelogs/weekly.yml"
            release.version = $node.text().replace("What's new in ", '');
        }
        const versionMatches = $node.text().match(/\b([0-9.]+)\b/)
        if (versionMatches) {
            release.version = versionMatches[1]
        }
        const dateMatches = $node.text().match(/\((\d+)[-\/](\d+)[-\/](\d+)\)/)
        if (dateMatches) {
            release.date = `${dateMatches[1]}-${dateMatches[2]}-${dateMatches[3]}`;
        }
        releases.push(release);
    } else if ($node.is('div') || $node.is('p')) {
        const release = releases[releases.length - 1];
        const text = $node.find('strong').text().trim()
        if (text.match(/Changes from [0-9\.]+/)) {
            release.lts_predecessor = text.match(/Changes from ([0-9\.]+)/)[1];
        } else if (text.match(/No changes compared to [0-9\.]+/)) {
            release.lts_predecessor = text.match(/No changes compared to ([0-9\.]+)/)[1];
        } else if (text.match(/Notable changes since [0-9\.]+/)) {
            release.lts_baseline = text.match(/Notable changes since ([0-9\.]+)/)[1];
            release.lts_changes = []
        } else {
            release.banner = $node.prop('innerHTML').trim()
        }
    } else if ($node.is('ul')) {
        $node.find('li').each(function (_, elm) {
            const $elm = $(elm)
            const release = releases[releases.length - 1];
            const change = {
                type: $elm.attr('class'),
                message: '',
            }
            release[release.lts_changes ? 'lts_changes' : 'changes'].push(change)
            $elm.find('a[href^=https://issues.jenkins.io/browse/]').each(function (_, a) {
                const $a = $(a);
                const issueId = $a.text().match(/(\d+)/)[1]
                if (!change.references) {change.references = []}
                change.references.push({issue: parseInt(issueId.trim(), 10)})
                $a.remove();
            })
            $elm.find('a[href^=https://github.com/jenkinsci/jenkins/pull/]').each(function (_, a) {
                const $a = $(a);
                const pullId = $a.text().match(/(\d+)/)[1]
                if (!change.references) {change.references = []}
                change.references.push({pull: parseInt(pullId.trim(), 10)})
                $a.remove();
            })
            $elm.find('a[href^=https://jenkins.io]').each(function (_, a) {
                const $a = $(a);
                if (!change.references) {change.references = []}
                change.references.push({url: $a.prop('href'), title: $a.text().trim()})
                $a.remove();
            })
            change.message = (change.message + $elm.html())
                .replace(/\((\s|and|,)*/gm, '(') // clean up any ( junk
                .trim()
        })
    } else {
        console.log(tagName, $node.attr());
    }
    $node = $node.next()
}
const changeToYamlNode = (change) => {
    const changeNode = YAML.createNode({});
    if (change.type) {
        changeNode.add({key: 'type', 'value': change.type})
    }
    changeNode.add({key: 'message', 'value': change.message.replace(/\(\s*\)/g, '').split("\n").map(m => m.trim()).filter(Boolean).join("\n")})
    if (change.references) {
        changeNode.add({key: 'references', 'value': change.references})
    }
    return changeNode;
}
const doc = releases.sort((a, b) => machineVersion(a.version).localeCompare(machineVersion(b.version))).map(release => {
    const node = YAML.createNode({version: release.version})
    // do it this way so we control order and don't have nulls
    if (release.date) {
        node.add({key: 'date', value: release.date});
    }
    if (release.lts_predecessor) {
        node.add({key: 'lts_predecessor', value: release.lts_predecessor});
    }
    if (release.lts_baseline) {
        node.add({key: 'lts_baseline', value: release.lts_baseline});
    }
    if (release.lts_changes) {
        node.add({key: 'lts_changes', value: release.lts_changes.map(changeToYamlNode)})
    }
    if (release.changes) {
        node.add({key: 'changes', value: release.changes.map(changeToYamlNode)})
    }
    return node;
})

console.log(YAML.stringify(doc).trim())

Nov 08 '21 03:11 halkeye

oh cool we style kbd (also i learned its a tag)

Nov 08 '21 03:11 halkeye

stable looks good, no bad data before it started

old has missing dates and stuff, changes don't have a type.

drat just noticed that it doesn't handle a couple reference links properly. I'll go back and fix them

Nov 08 '21 03:11 halkeye

Weekly changelog is updated via a script which will reformat the file every time it’s run

Nov 08 '21 07:11 timja

Yea i took a peek at that. I don't know why it's one level in. Top level should have 0 spaces but apparently it's fine as long as it's consistent

Nov 08 '21 07:11 halkeye

I'll go back and fix them

Btw I did fix the script and the output. I should be able to regenerate the pr if it ever conflicts

Nov 08 '21 08:11 halkeye

Hmm this makes the file even longer than before =/.

Its already a bit of a pain to edit and browsers can struggle with it, along with insane scrolling. I just tried with the file, and browser was ok but I couldn't find a way to quickly get to the bottom, I scrolled for awhile and gave up.

Could we archive it in some way while still having it available?

Nov 08 '21 08:11 timja

My goal is to try and unify as much legacy and one off data into consistent machine parsible files.

Could easily move the 1x stuff to another file, especially if we don't even bother to render it. It's neat from a historical point of view but 20 year old versions are it super useful.

Nov 08 '21 08:11 halkeye

I think it would be good to archive 1.x at least, we could probably do more as well but can we see what that looks like?

Nov 08 '21 09:11 timja

Please take a moment and address the merge conflicts of your pull request. Thanks!

Mar 07 '23 19:03 github-actions[bot]

jenkins.io jenkins.io copied to clipboard

changelogs converted

jenkins.io
jenkins.io copied to clipboard