Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: parallelize terser runs #1123

Merged
merged 13 commits into from
Sep 19, 2019
102 changes: 89 additions & 13 deletions packages/terser/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
const fs = require('fs');
const path = require('path');
const child_process = require('child_process');
const os = require('os')

// Run Bazel with --define=VERBOSE_LOGS=1 to enable this logging
const VERBOSE_LOGS = !!process.env['VERBOSE_LOGS'];
Expand All @@ -17,6 +18,10 @@ function log_verbose(...m) {
if (VERBOSE_LOGS) console.error('[terser/index.js]', ...m);
}

function log_error(...m) {
console.error('[terser/index.js]', ...m);
}

// Peek at the arguments to find any directories declared as inputs
let argv = process.argv.slice(2);
// terser_minified.bzl always passes the inputs first,
Expand All @@ -25,10 +30,19 @@ let argv = process.argv.slice(2);
// Avoid a dependency on a library like minimist; keep it simple.
const outputArgIndex = argv.findIndex((arg) => arg.startsWith('--'));

// We don't want to implement a command-line parser for terser
// so we invoke its CLI as child processes when a directory is provided, just altering the
// input/output arguments. See discussion: https://github.com/bazelbuild/rules_nodejs/issues/822

const inputs = argv.slice(0, outputArgIndex);
const output = argv[outputArgIndex + 1];
const residual = argv.slice(outputArgIndex + 2);

// user override for the terser binary location. used in testing.
const TERSER_BINARY = process.env.TERSER_BINARY || require.resolve('terser/bin/uglifyjs')
// choose a default concurrency of the number of cores -1 but at least 1.
const TERSER_CONCURENCY = (process.env.TERSER_CONCURRENCY || os.cpus().length - 1) || 1

log_verbose(`Running terser/index.js
inputs: ${inputs}
output: ${output}
Expand All @@ -43,31 +57,93 @@ function terserDirectory(input) {
fs.mkdirSync(output);
}

let work = [];
let active = 0;
let errors = [];

function exec([inputFile, outputFile]) {
active++;
soldair marked this conversation as resolved.
Show resolved Hide resolved
let args = [TERSER_BINARY, inputFile, '--output', outputFile, ...residual];

spawn(process.execPath, args)
.then(
(data) => {
if (data.code) {
errors.push(inputFile)
// NOTE: Even though a terser process has errored we continue here to collect all of
// the errors. this behavior is another candidate for user configuration because
// there is value in stopping at the first error in some use cases.

log_error(`errored: ${inputFile}\nOUT: ${data.out}\nERR: ${data.err}\ncode: ${
data.code}`);
} else {
log_verbose('finished: ', inputFile);
}
--active;
next();
},
(err) => {
--active;
log_error('errored: [spawn exception]', inputFile, '\n' + err)
errors.push(inputFile)
next();
})
}

function next() {
if (work.length) {
exec(work.shift());
} else if (!active) {
if (errors.length) {
log_error('terser errored processing javascript in directory.')
process.exitCode = 2;
}
// NOTE: work is done at this point and node should exit here.
}
}

fs.readdirSync(input).forEach(f => {
if (f.endsWith('.js')) {
const inputFile = path.join(input, path.basename(f));
const outputFile = path.join(output, path.basename(f));
// We don't want to implement a command-line parser for terser
// so we invoke its CLI as child processes, just altering the input/output
// arguments. See discussion: https://github.com/bazelbuild/rules_nodejs/issues/822
// FIXME: this causes unlimited concurrency, which will definitely eat all the RAM on your
// machine;
// we need to limit the concurrency with something like the p-limit package.
// TODO: under Node 12 it should use the worker threads API to saturate all local cores
child_process.fork(
// Note that the fork API doesn't do any module resolution.
require.resolve('terser/bin/uglifyjs'), [inputFile, '--output', outputFile, ...residual]);

if (active < TERSER_CONCURENCY) {
exec([inputFile, outputFile]);
} else {
work.push([inputFile, outputFile])
}
}
});
}

if (!inputs.find(isDirectory)) {
if (!inputs.find(isDirectory) && inputs.length) {
// Inputs were only files
// Just use terser CLI exactly as it works outside bazel
require('terser/bin/uglifyjs');
require(TERSER_BINARY || 'terser/bin/uglifyjs');

} else if (inputs.length > 1) {
// We don't know how to merge multiple input dirs to one output dir
throw new Error('terser_minified only allows a single input when minifying a directory');
} else {

} else if (inputs[0]) {
terserDirectory(inputs[0]);
}

function spawn(cmd, args) {
return new Promise((resolve, reject) => {
const err = [];
const out = [];
// this may throw syncronously if the process cannot be created.
let proc = child_process.spawn(cmd, args);

proc.stdout.on('data', (buf) => {
out.push(buf);
});
proc.stderr.on('data', (buf) => {err.push(buf)})
proc.on('exit', (code) => {
// we never reject here based on exit code because an error is a valid result of running a
// process.
resolve({out: Buffer.concat(out), err: err.length ? Buffer.concat(err) : false, code});
});
})
}
11 changes: 11 additions & 0 deletions packages/terser/test/exec/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
load("@npm_bazel_jasmine//:index.from_src.bzl", "jasmine_node_test")

jasmine_node_test(
name = "test",
srcs = ["spec.js"],
coverage = True,
data = [
"@npm_bazel_terser//:index.js",
],
tags = ["exclusive"],
)
144 changes: 144 additions & 0 deletions packages/terser/test/exec/spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
const fs = require('fs');
const cp = require('child_process');
const path = require('path');
const util = require('util');
const assert = require('assert');

const terserWrap = require.resolve('npm_bazel_terser/index.js');

if (!fs.existsSync(terserWrap)) {
throw new Error(
'expected to find terserwrap javascript file at \n' + terserWrap + '\nbut it does not exist!')
}

if (process.platform === 'win32' && process.versions.node.split('.')[0] < 12) {
// skip this test if we're on node10.
// TODO: remove this when node 12 is the default
console.error(
'this test is only run on windows with nodejs >= 12 because there is an issue with spawning processes and ENOMEM errors.')
process.exit(0)
}

function terser(inputFile, outputFile, opts, attempts = 0) {
return cp.execFileSync(
process.execPath, [terserWrap, inputFile, '--output', outputFile], opts || {env: []})
}

describe('run terser', () => {
it('should fail', () => {
let thrown = false
try {
terser('loop.js', 'boop.js')
} catch (e) {
console.error(e, e + '')
assert.strictEqual(e.status, 1, 'exit code should be 1');
thrown = true;
}
assert.ok(thrown, 'should have thrown on missing input file.')

fs.writeFileSync('soup.js', 'omg soup!');

let stderr = '';
try {
terser('soup.js', 'boop.js')
} catch (e) {
assert.ok(e.status, 'exit code');
stderr = e.stderr + ''
}

assert.ok(
stderr.indexOf('Parse error at') > -1, 'should have parse error in output from terser.')
})

it('errors when cannot locate user defined terser binary', () => {
fs.writeFileSync('soup.js', '"valid";');

try {
terser('soup.js', 'boop.js', {env: {TERSER_BINARY: 'DOES_NOT_EXIST'}})
} catch (e) {
console.error(e, e + '')
assert.ok(e.status, 'exit code');
stderr = e.stderr + ''
}
})

// terser's default behavior waits for javascript from stdin if no arguments are provided.
// this doesnt match with how we can use it in bazel so we disable this feature.
it('exits when no arguments are provided.', async () => {
await util.promisify(cp.execFile)(process.execPath, [terserWrap], {env: []});
})

it('errors when any file in a directory fails', () => {
// retries are not run in clean directories
try {
fs.mkdirSync('fruit')
fs.mkdirSync('frout')
} catch (e) {
}

fs.writeFileSync('fruit/apples.js', 'yay apple!');
fs.writeFileSync('fruit/orange.js', '"orange.js"');
stderr = '';
try {
terser('fruit', 'frout')
} catch (e) {
console.log(e + '', e.status, e.stderr + '', e.stdout + '')

assert.strictEqual(e.status, 2, 'exit code 2');
stderr = e.stderr + ''
}

assert.ok(
stderr.indexOf('Parse error at') > -1, 'should have parse error in output from terser.');

assert.ok(
fs.existsSync('frout/orange.js'),
'even if one script has a parse error all other scripts should be processed.')
});

// always uses concurrency >= to the number of items to process
// this excercies a code path where the pending work queue is never used.
it('processes all files in a directory', () => {
// retries are not run in clean directories.
try {
fs.mkdirSync('fruit2');
fs.mkdirSync('frout2');
} catch (e) {
}

fs.writeFileSync('fruit2/apples.js', '"apple";');
fs.writeFileSync('fruit2/orange.js', '"orange!";');

try {
terser('fruit2', 'frout2', {env: {TERSER_CONCURRENCY: 2}})
} catch (e) {
assert.fail('should not have failed to process any javascript.')
}

assert.ok(fs.existsSync('frout2/orange.js'), 'minified js should have been created')
assert.ok(fs.existsSync('frout2/apples.js'), 'minified js should have been created')
});

// this excercies the code path where work has to be queued an dequeued based on exhausting the
// avaiable concurrency.
it('processes all files in a directory with single concurrency', () => {
// retries are not run in clean directories
try {
fs.mkdirSync('fruit3');
fs.mkdirSync('frout3');
} catch (e) {
}

fs.writeFileSync('fruit3/apples.js', '"apple";');
fs.writeFileSync('fruit3/orange.js', '"orange!";');

try {
terser('fruit3', 'frout3', {env: {TERSER_CONCURRENCY: 1}})
} catch (e) {
assert.fail('should not have failed to process any javascript.')
}

assert.ok(fs.existsSync('frout3/orange.js'), 'minified js should have been created')
assert.ok(fs.existsSync('frout3/apples.js'), 'minified js should have been created')
});
});