UglifyJS
UglifyJS copied to clipboard
convert void 0 to a new var
I have sprinkled the code with lots of TODOs and questions. The basic implementation works, using only within-same-function transforms as proposed by @kzc.
- According to the benchmark:
- Average uglified size improved by ~0.16%
- Average gzip size worsened by ~0.17%
Do these results mean that we should abandon this idea? If we want this transform, it shouldn't be enabled by default, and information needs to be added to the README.
Results from node test/benchmark.js
|
BEFORE THIS CHANGE
https://code.jquery.com/jquery-3.2.1.js
Original: 268039 bytes Uglified: 86074 bytes GZipped: 30058 bytes SHA1 sum: 3e16e5ec39f60f779cf1cb407dcc0fb93c1216ab https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.6.4/angular.js
Original: 1249863 bytes Uglified: 173616 bytes GZipped: 59988 bytes SHA1 sum: 87f388ccba07cb8505ac03ff75cadb235c6a437a https://cdnjs.cloudflare.com/ajax/libs/mathjs/3.9.0/math.js
Original: 1590107 bytes Uglified: 466740 bytes GZipped: 118842 bytes SHA1 sum: f8502b2990e245757a06d53ad670aaa710806e66 https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.js
Original: 69707 bytes Uglified: 36836 bytes GZipped: 9582 bytes SHA1 sum: 31b3a3cb53ed8dd5e92519891b92a60eb07babfc https://unpkg.com/[email protected]/dist/react.js
Original: 701412 bytes Uglified: 205174 bytes GZipped: 62273 bytes SHA1 sum: 7a8ad6431a54c84e476a1a87aeba0a80a6f1dd06 http://builds.emberjs.com/tags/v2.11.0/ember.prod.js
Original: 1852178 bytes Uglified: 526064 bytes GZipped: 129050 bytes SHA1 sum: 22037ab14a33379b2ee9ba9270ce22e4ee970a2c https://cdn.jsdelivr.net/lodash/4.17.4/lodash.js
Original: 539590 bytes Uglified: 69578 bytes GZipped: 24877 bytes SHA1 sum: 025d83a7526aa44dd4a175a5fabc3ef7c6992967 https://cdnjs.cloudflare.com/ajax/libs/d3/4.5.0/d3.js
Original: 451131 bytes Uglified: 211601 bytes GZipped: 71051 bytes SHA1 sum: 1636ad62c5338200bb99eb57fa3fd39f882502d2 https://raw.githubusercontent.com/kangax/html-minifier/v3.5.7/dist/htmlminifier.js
Original: 1063075 bytes Uglified: 508164 bytes GZipped: 158013 bytes SHA1 sum: ad6e5c94ba84dc6cb0227b57504f4c2e3abac535 |
WITH THIS CHANGE
https://code.jquery.com/jquery-3.2.1.js
Original: 268039 bytes Uglified: 85768 bytes GZipped: 30113 bytes SHA1 sum: 5a7980c8fc4be9cda00bdf23457b336d728021ff https://cdnjs.cloudflare.com/ajax/libs/angular.js/1.6.4/angular.js
Original: 1249863 bytes Uglified: 173393 bytes GZipped: 60045 bytes SHA1 sum: 6cc222e3f8bc88192bba299f3504ccc3959fa940 https://cdnjs.cloudflare.com/ajax/libs/mathjs/3.9.0/math.js
Original: 1590107 bytes Uglified: 466184 bytes GZipped: 119122 bytes SHA1 sum: 660b9e954b1f85ba742e964bbb35ebad167664e3 https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.js
Original: 69707 bytes Uglified: 36830 bytes GZipped: 9580 bytes SHA1 sum: d6c3dab88ac5f13c1f5dd20c232b60e3efbe237b https://unpkg.com/[email protected]/dist/react.js
Original: 701412 bytes Uglified: 204937 bytes GZipped: 62343 bytes SHA1 sum: a76418a746a313e3eab3db474811dbf280758922 http://builds.emberjs.com/tags/v2.11.0/ember.prod.js
Original: 1852178 bytes Uglified: 524078 bytes GZipped: 129669 bytes SHA1 sum: 1f19d7f41160f151fca54afe903b0a34d3098f1c https://cdn.jsdelivr.net/lodash/4.17.4/lodash.js
Original: 539590 bytes Uglified: 69578 bytes GZipped: 24877 bytes SHA1 sum: 025d83a7526aa44dd4a175a5fabc3ef7c6992967 https://cdnjs.cloudflare.com/ajax/libs/d3/4.5.0/d3.js
Original: 451131 bytes Uglified: 211569 bytes GZipped: 71062 bytes SHA1 sum: 5c47a60b136f38376fcadff4129f0254ebca0d5e https://raw.githubusercontent.com/kangax/html-minifier/v3.5.7/dist/htmlminifier.js
Original: 1063075 bytes Uglified: 507867 bytes GZipped: 158078 bytes SHA1 sum: 87d89bfcd4d713ed3e91f9bfecd7dc86c59cd00b |
fixes #2585
The uglified numbers look positive for starters.
I suspect the worse gzipped results are (partly) due to mangle – one way to assess is to run without it:
node test/benchnark.js -c
I haven't looked at the code yet as I'm about to doze off – will get back to you on that.
Thanks for working on this :+1:
I think disabling mangling doesn't change the gzip results (~0.17% worse, like before). I'm also tired, so I might be mistaken.
Here's an idea that probably doesn't change anything, but could.
// with this transform
var existing=1,variables=2,undefined; ...
// idea
var undefined,existing=1,variables=2;...
Re-reading https://github.com/mishoo/UglifyJS2/issues/2585#issuecomment-351479601 it claims to produce post-gzip improvements, which is not observed anywhere (except bootstrap.js) - so I'm not sure how that was done.
Here's a more restricted version which never introduces new variables, i.e. only group void 0s into an existing variable which happens to be undefined:
--- a/lib/compress.js
+++ b/lib/compress.js
@@ -58,6 +58,7 @@ function Compressor(options, false_by_default) {
evaluate : !false_by_default,
expression : false,
global_defs : {},
+ group_voids : !false_by_default,
hoist_funs : false,
hoist_props : !false_by_default,
hoist_vars : false,
@@ -196,6 +197,11 @@ merge(Compressor.prototype, {
}
}
}
+ if (this.option("group_voids")) {
+ node.figure_out_scope(mangle);
+ node.reset_opt_flags(this);
+ node.group_voids(this);
+ }
if (this.option("expression")) {
node.process_expression(false);
}
@@ -270,6 +276,28 @@ merge(Compressor.prototype, {
return this.TYPE == node.TYPE && this.print_to_string() == node.print_to_string();
});
+ AST_Toplevel.DEFMETHOD("group_voids", function(compressor) {
+ var void_sym;
+ this.transform(new TreeTransformer(function(node, descend) {
+ if (node instanceof AST_Scope) {
+ var old_sym = void_sym;
+ node.variables.each(function(def) {
+ if (old_sym !== void_sym) return;
+ var fixed = def.orig[0].fixed_value();
+ if (fixed && is_undefined(fixed, compressor)) {
+ void_sym = def.orig[0];
+ }
+ });
+ descend(node, this);
+ void_sym = old_sym;
+ return node;
+ }
+ if (void_sym && is_undefined(node, compressor)) {
+ return make_node(AST_SymbolRef, void_sym, void_sym);
+ }
+ }));
+ });
+
AST_Scope.DEFMETHOD("process_expression", function(insert, compressor) {
var self = this;
var tt = new TreeTransformer(function(node) {
This only makes a difference to math.js, but even so gzip still isn't a win:
node test/benchmark -mc group_voids=false
https://cdnjs.cloudflare.com/ajax/libs/mathjs/3.9.0/math.js
- parse: 0.875s
- rename: 0.562s
- compress: 5.350s
- scope: 0.203s
- mangle: 0.328s
- properties: 0.000s
- output: 0.234s
- total: 7.552s
Original: 1590107 bytes
Uglified: 466736 bytes
GZipped: 118836 bytes
SHA1 sum: 39105dfb8a9231469b83993c388735b9655cab2d
node test/benchmark -mc group_voids=true
https://cdnjs.cloudflare.com/ajax/libs/mathjs/3.9.0/math.js
- parse: 0.921s
- rename: 0.579s
- compress: 5.531s
- scope: 0.156s
- mangle: 0.328s
- properties: 0.000s
- output: 0.235s
- total: 7.750s
Original: 1590107 bytes
Uglified: 466571 bytes
GZipped: 118888 bytes
SHA1 sum: 6a6435c980dea291bd5b07bd1a34c131b9d19a38
So I reckon the initial analysis is flawed, which is sadly quite often the case when it comes to mangle and gzip.
At least it gives you an idea how scan and replace the AST with minimal number of rounds :wink:
My idea to place the undefined variable first in the var declaration produces slightly worse results. I've tried a few variants to see if the placement of the new undefined declaration can make the gzip gods happier.
// Last in the first var (my initial implementation)
// Gzip worse by 0.174%
var a, b, undefined; var c, d; var e, f;
// First in the first var
// Gzip worse by 0.189%
var undefined, a, b; var c, d; var e, f;
// Last in the last var
// Gzip worse by 0.145%
var a, b; var c, d; var e, f, undefined;
// First in the last var
// Gzip worse by 0.158%
var a, b; var c, d; var undefined, e, f;
@alexlamsl's idea of never introducing new variables has a tiny effect compared to introducing new variables, so I don't think it would be worth it. If we're implementing any of these, the "last in the last var" should be used, since it produces the least bad gzip.
I also think we should benchmark introducing a var before deciding what to do.
Not creating a new var (like my initial impementation) improves uglification by 0.159% (as reported in my initial comment).
// Last in the last var (best from my comment above)
// Uglify better by 0.159%
// Gzip worse by 0.145%
// Last in the last var + create new `var` last in function if there is no existing `var`
// Uglify better by 0.165%
// Gzip worse by 0.162%
// Last in the last var + create new `var` last in function if there is no existing `var` only if there are more than two `void 0` uses
// Uglify better by 0.171%
// Gzip worse by 0.152%
// Last in the last var + create new `var` first in function if there is no existing `var`
// Uglify better by 0.164%
// Gzip worse by 0.152%
// Last in the last var + create new `var` first in function if there is no existing `var` only if there are more than two `void 0` uses
// Uglify better by 0.171%
// Gzip worse by 0.148%
Idea: introduce new var if there are more than three void 0 uses. 😄
Introducing new variables tend to cause cascading effects on variable name assignments within mangle, which is why such a large gzip delta can be observed with a seemingly small change.
I haven't come up with any better ideas to try yet, but then we are in no great hurry either :wink:
And yes, putting that new variable first in the first var would shift the names of all subsequent declarations in the scope, so has a bigger effect than a trailing declaration (still not zero since it would cause intra-scope disruptions).
FWIW, inline>1 also had to fight with gzip regressions back then via various tricks and hacks to make it profitable.
I've done a bunch of experiments varying how many voids are required before they are replaced. In all these experiments a new variable to an existing var is placed last, and a new var is placed first (which the above experiments show that Gzip likes best). I've compared the sum(uglified) with sum(gzip) for the benchmark test, not using a geometric mean (which might be more mathematically rigorous). In no case is the Gzip improved. 😞
I can't think of another way to improve the Gzip without the new variable crossing scopes. Where does that leave us? Abandon this project or include it anyway defaulted off?
If we include it, it should have as large an uglify improvement compared to the gzip worsening, while still making a significant difference. Choosing from the options below, I'd go for either {1, 2} or {2, 2}. What do you think?
| Add variable to `var` when it replaces | Add new `var` when it replaces | Uglify improvement | Gzip worsening | Uglify-Gzip factor |
|---|---|---|---|---|
| 1 void | never | 0.159 % | 0.145 % | 1.10 |
| 1 void | 1 void | 0.164 % | 0.152 % | 1.08 |
| 1 void | 2 voids | 0.171 % | 0.148 % | 1.16 |
| 1 void | 3 voids | 0.167 % | 0.144 % | 1.16 |
| 2 voids | 2 voids | 0.117 % | 0.072 % | 1.63 |
| 2 voids | 3 voids | 0.113 % | 0.069 % | 1.64 |
| 2 voids | never | 0.106 % | 0.068 % | 1.56 |
| 3 voids | 3 voids | 0.075 % | 0.041 % | 1.83 |
| 3 voids | never | 0.067 % | 0.04 % | 1.68 |
| 5 voids | 5 voids | 0.03 % | 0.01 % | 3 |
| 5 voids | never | 0.026 % | 0.01 % | 2.6 |
(Assuming I haven't messed up the implementation or the numbers.)
IMHO the numbers look rather unappealing as it stands - I'd be happier if the uglified gain is much larger than the gzip loss.
I think part of the issue here is with mangle, c.f. #2219
Introducing cross-scope variables for this feature probably won't do a lot of good, since it'd mess with variable name assignments pretty much the same way.
@Skalman now that #2948 is in, please have another go at this and see if your previous strategies would be more compressible with the new mangle.
Sure, I'll have a look in a few days.
@Skalman now that #2948 is in, please have another go at this and see if your previous strategies would be more compressible with the new mangle.
I tried rebasing against the current master, but the numbers are no better.
A solution that'd likely improve also the Gzip numbers would be to do what @alexlamsl did in #2945. It should be possible to implement that during the compress stage, before mangle.
This would introduce captured variables. @kzc believes that this would cause runtime performance issue, and intuitively I'd agree. @alexlamsl, is this incorrect?
@Skalman as I mentioned in https://github.com/mishoo/UglifyJS2/pull/2945#issuecomment-367797185, no measurable performance difference is observed through JetStream.
In general we need something like 10x slowdown across a majority number of platforms to justify worrying about it, as JavaScript runtimes will receive continuous speed-ups and tuning against common code snippets with time.
I'm actually surprised the numbers don't improve with the latest mangle - I'll take another look at the changes in this PR after some sleep to see what I have missed.
Hmm - looks like the upper-level variables still don't like to be kicked around by those newly introduced void 0 substitutes:
--- a/lib/compress.js
+++ b/lib/compress.js
@@ -59,6 +59,7 @@ function Compressor(options, false_by_default) {
evaluate : !false_by_default,
expression : false,
global_defs : {},
+ group_voids : false,
hoist_funs : false,
hoist_props : !false_by_default,
hoist_vars : false,
@@ -200,6 +201,9 @@ merge(Compressor.prototype, {
if (this.option("expression")) {
node.process_expression(false);
}
+ if (this.option("group_voids")) {
+ node.group_voids(this);
+ }
return node;
},
info: function() {
@@ -316,6 +320,79 @@ merge(Compressor.prototype, {
self.transform(tt);
});
+ AST_Scope.DEFMETHOD("get_void_var", function(compressor) {
+ var void_var;
+ this.variables.each(function(def) {
+ if (void_var) return;
+ var value = def.orig[0].fixed_value();
+ if (value && is_undefined(value, compressor)) {
+ void_var = def.orig[0];
+ }
+ });
+ if (!void_var) {
+ void_var = make_node(AST_SymbolVar, this, {
+ name: this.make_var_name("undefined"),
+ scope: this
+ });
+ this.enclosed.push(this.def_variable(void_var));
+ var var_stat;
+ this.walk(new TreeWalker(function(node) {
+ if (node instanceof AST_Scope && this.parent()) return true;
+ if (node instanceof AST_Var) {
+ var_stat = node;
+ return true;
+ }
+ }));
+ if (!var_stat) {
+ var_stat = make_node(AST_Var, this, {
+ definitions: []
+ });
+ this.body.push(var_stat);
+ }
+ var_stat.definitions.push(make_node(AST_VarDef, this, {
+ name: void_var,
+ value: null
+ }));
+ }
+ return void_var;
+ });
+
+ AST_Toplevel.DEFMETHOD("group_voids", function(compressor) {
+ var scope, void_var;
+ this.transform(new TreeTransformer(function(node, descend) {
+ if (node instanceof AST_Scope) {
+ var save_scope = scope;
+ var save_var = void_var;
+ scope = node;
+ void_var = null;
+ descend(node, this);
+ void_var = save_var;
+ scope = save_scope;
+ return node;
+ }
+ if (is_undefined(node, compressor)) {
+ if (!void_var) void_var = scope.get_void_var(compressor);
+ return make_node(AST_SymbolRef, node, void_var);
+ }
+ }));
+ });
+
+ AST_Toplevel.DEFMETHOD("group_voids_cross_scope", function(compressor) {
+ var self = this, scope, void_var;
+ this.transform(new TreeTransformer(function(node, descend) {
+ if (node instanceof AST_Scope && node.parent_scope === self) {
+ scope = node;
+ descend(node, this);
+ scope = null;
+ void_var = null;
+ return node;
+ }
+ if (scope && is_undefined(node, compressor)) {
+ if (!void_var) void_var = scope.get_void_var(compressor);
+ return make_node(AST_SymbolRef, node, void_var);
+ }
+ }));
+ });
+
(function(def){
def(AST_Node, noop);
And yes, if you swap node.group_voids(this); with node.group_voids_cross_scope(this); you get better uglified and gzipped results.