graaljs
graaljs copied to clipboard
Poor performance of simple set of chars regexes relatively to Node 20.1
While comparing GraalVM with Node for server side rendering of React. I noticed that Graal version is loosing significant time escaping html in ReactDomServer. The code used to escape the html comes from https://github.com/component/escape-html/blob/master/index.js.
In profiling the time appears to be spent executing the regex code below to determine if the html string has any characters that need to be escaped. This code performs significantly faster in Node. This regex needs to run as fast as a single pass loop over the input characters.
var matchHtmlRegExp = /["'&<>]/
function escapeHtml (string) { var str = '' + string var match = matchHtmlRegExp.exec(str)
if (!match) { return str } var escape var html = '' var index = 0 var lastIndex = 0
for (index = match.index; index < str.length; index++) { switch (str.charCodeAt(index)) { case 34: // " escape = '"' break case 38: // & escape = '&' break case 39: // ' escape = ''' break case 60: // < escape = '<' break case 62: // > escape = '>' break default: continue }
if (lastIndex !== index) {
html += str.substring(lastIndex, index)
}
lastIndex = index + 1
html += escape
}
return lastIndex !== index ? html + str.substring(lastIndex, index) : html }
Another example where simple set of characters regex kills performance is in serialize-javascript module:
var UNSAFE_CHARS_REGEXP = /[<>/\u2028\u2029]/g; ... var ESCAPED_CHARS = { '<' : '\u003C', '>' : '\u003E', '/' : '\u002F', '\u2028': '\u2028', '\u2029': '\u2029' };
function escapeUnsafeChars(unsafeChar) { return ESCAPED_CHARS[unsafeChar]; } ....
// Creates a JSON string representation of the value.
// NOTE: Node 0.12 goes into slow mode with extra JSON.stringify() args.
if (options.isJSON && !options.space) {
str = JSON.stringify(obj);
} else {
str = JSON.stringify(obj, options.isJSON ? null : replacer, options.space);
}
// Protects against `JSON.stringify()` returning `undefined`, by serializing
// to the literal string: "undefined".
if (typeof str !== 'string') {
return String(str);
}
// Replace unsafe HTML and invalid JavaScript line terminator chars with
// their safe Unicode char counterpart. This _must_ happen before the
// regexps and functions are serialized and added back to the string.
if (options.unsafe !== true) {
str = str.replace(UNSAFE_CHARS_REGEXP, escapeUnsafeChars);
}
....
Hi @a701440
thanks for the report, we are looking into this already.
General remark: In your first example, the poor performance might also come from the substring (we are slower than V8 on that). @djoooooe is looking into it.
Best, Christian
Tracked internally as https://ol-jira.us.oracle.com/browse/GR-23798
Fixed by https://github.com/oracle/graal/commit/a96499b969fafe723091f17361e7f7b62cf2b3eb . Graaljs is now ~20x faster than V8 when searching for /["'&<>]/
in 4KB of lorem ipsum.