svalbard
svalbard copied to clipboard
CKAN metadata exporter
Need a CLI that can monitor metadata for a CKAN instance regularly and produce complete exports of metadata in NDJSON format.
For Data.gov I used this script (note the query here does not get all items as Data.gov has sub-organizations with datasets that this query ignores, not sure if that's a general CKAN concept or just Data.gov specific):
var request = require('request')
var fs = require('fs')
var ndjson = require('ndjson')
var serialize = ndjson.serialize()
var write = fs.createWriteStream('./meta.json')
serialize.pipe(write)
var current = 0
var rows = 1000
var delay = 1000
var limit = 200000
function page (start) {
return `http://catalog.data.gov/api/3/action/package_search?rows=${rows}&start=${start}`
}
function go () {
var url = page(current)
console.log('GET', url)
request({url: url, json: true}, function (err, resp, body) {
if (err) throw err
if (resp.statusCode !== 200) throw new Error(body)
if (!body.result.results.length) throw new Error(body)
current += rows
body.result.results.forEach(function (r) {
serialize.write(r)
})
if (current > limit) {
console.log('done')
serialize.end()
} else {
setTimeout(go, delay)
}
})
}
go()