html2json icon indicating copy to clipboard operation
html2json copied to clipboard

Example not working

Open cmcknight opened this issue 5 years ago • 2 comments

I'm using the following HTML page:

<!doctype html>
<html lang="en">
<head>
    <title>Test Page</title>
</head>
<body>
    <div id="1" class="foo">
    <h2>sample text with <code>inline tag</code></h2>
    <pre id="demo" class="foo bar">foo</pre>
    <pre id="output" class="goo">goo</pre>
    <input id="execute" type="button" value="execute"/>
    </div>
</body>
</html>

with the following code:

var html2json = require('html2json').html2json;
var json2html = require('html2json').json2html;
var fs = require ('fs');

// load the HTML from a file
var htmldoc = fs.readFileSync('./index2.html', "utf8");
console.log(htmldoc);

// convert the innerHTML of the body to JSON
var jsondoc = html2json(htmldoc.body.innerHTML);
console.log(jsondoc);

with the following output:

<!doctype html>
<html lang="en">
<head>
    <title>Test Page</title>
    <meta charset="utf-8" >
</head>
<body>
    <div id="1" class="foo">
    <h2>sample text with <code>inline tag</code></h2>
    <pre id="demo" class="foo bar">foo</pre>
    <pre id="output" class="goo">goo</pre>
    <input id="execute" type="button" value="execute"/>
    </div>
</body>
</html>

[stdin]:12
var jsondoc = html2json(htmldoc.body.innerHTML);
                                     ^

TypeError: Cannot read property 'innerHTML' of undefined
    at [stdin]:12:38
    at Script.runInThisContext (vm.js:132:18)
    at Object.runInThisContext (vm.js:315:38)
    at Object.<anonymous> ([stdin]-wrapper:10:26)
    at Module._compile (internal/modules/cjs/loader.js:1236:30)
    at evalScript (internal/process/execution.js:98:25)
    at internal/main/eval_stdin.js:29:5
    at Socket.<anonymous> (internal/process/execution.js:211:5)
    at Socket.emit (events.js:326:22)
    at endReadableNT (_stream_readable.js:1244:12)
[Finished in 0.249s]

What am I missing? ☹️

cmcknight avatar Sep 07 '20 20:09 cmcknight

@cmcknight fs.readFileSync returns a string, that means htmldoc variable is a string and doesn't have body property. Probably var jsondoc = html2json(htmldoc); will work for you.

fremail avatar Oct 26 '20 09:10 fremail

@cmcknight : As highlighted by @fremail the html is not a DOM object while we read from the file. Its a mere String. You may use "jsdom" package to handle this.

var html2json = require('html2json').html2json;
var json2html = require('html2json').json2html;
const { JSDOM } = require("jsdom");
var fs = require('fs');

JSDOM.fromFile("Sample.html").then(dom => {
    console.log(dom.window.document.body.innerHTML.toString().trim());
    var jsondoc = html2json(dom.window.document.body.innerHTML.toString().trim());
   console.log(jsondoc );
    fs.writeFile('sample.json', JSON.stringify(jsondoc), function (err) {
        if (err) throw err;
        console.log('Created File!');
    });
 });

Sesh2020 avatar Oct 28 '20 11:10 Sesh2020