Skip to content

Commit

Permalink
chore: parallelize terser runs (#1123)
Browse files Browse the repository at this point in the history
  • Loading branch information
soldair authored Sep 19, 2019
1 parent 646d75b commit e4427b3
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 13 deletions.
102 changes: 89 additions & 13 deletions packages/terser/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
const fs = require('fs');
const path = require('path');
const child_process = require('child_process');
const os = require('os')

// Run Bazel with --define=VERBOSE_LOGS=1 to enable this logging
const VERBOSE_LOGS = !!process.env['VERBOSE_LOGS'];
Expand All @@ -17,6 +18,10 @@ function log_verbose(...m) {
if (VERBOSE_LOGS) console.error('[terser/index.js]', ...m);
}

function log_error(...m) {
console.error('[terser/index.js]', ...m);
}

// Peek at the arguments to find any directories declared as inputs
let argv = process.argv.slice(2);
// terser_minified.bzl always passes the inputs first,
Expand All @@ -25,10 +30,19 @@ let argv = process.argv.slice(2);
// Avoid a dependency on a library like minimist; keep it simple.
const outputArgIndex = argv.findIndex((arg) => arg.startsWith('--'));

// We don't want to implement a command-line parser for terser
// so we invoke its CLI as child processes when a directory is provided, just altering the
// input/output arguments. See discussion: https://github.com/bazelbuild/rules_nodejs/issues/822

const inputs = argv.slice(0, outputArgIndex);
const output = argv[outputArgIndex + 1];
const residual = argv.slice(outputArgIndex + 2);

// user override for the terser binary location. used in testing.
const TERSER_BINARY = process.env.TERSER_BINARY || require.resolve('terser/bin/uglifyjs')
// choose a default concurrency of the number of cores -1 but at least 1.
const TERSER_CONCURENCY = (process.env.TERSER_CONCURRENCY || os.cpus().length - 1) || 1

log_verbose(`Running terser/index.js
inputs: ${inputs}
output: ${output}
Expand All @@ -43,31 +57,93 @@ function terserDirectory(input) {
fs.mkdirSync(output);
}

let work = [];
let active = 0;
let errors = [];

function exec([inputFile, outputFile]) {
active++;
let args = [TERSER_BINARY, inputFile, '--output', outputFile, ...residual];

spawn(process.execPath, args)
.then(
(data) => {
if (data.code) {
errors.push(inputFile)
// NOTE: Even though a terser process has errored we continue here to collect all of
// the errors. this behavior is another candidate for user configuration because
// there is value in stopping at the first error in some use cases.

log_error(`errored: ${inputFile}\nOUT: ${data.out}\nERR: ${data.err}\ncode: ${
data.code}`);
} else {
log_verbose('finished: ', inputFile);
}
--active;
next();
},
(err) => {
--active;
log_error('errored: [spawn exception]', inputFile, '\n' + err)
errors.push(inputFile)
next();
})
}

function next() {
if (work.length) {
exec(work.shift());
} else if (!active) {
if (errors.length) {
log_error('terser errored processing javascript in directory.')
process.exitCode = 2;
}
// NOTE: work is done at this point and node should exit here.
}
}

fs.readdirSync(input).forEach(f => {
if (f.endsWith('.js')) {
const inputFile = path.join(input, path.basename(f));
const outputFile = path.join(output, path.basename(f));
// We don't want to implement a command-line parser for terser
// so we invoke its CLI as child processes, just altering the input/output
// arguments. See discussion: https://github.com/bazelbuild/rules_nodejs/issues/822
// FIXME: this causes unlimited concurrency, which will definitely eat all the RAM on your
// machine;
// we need to limit the concurrency with something like the p-limit package.
// TODO: under Node 12 it should use the worker threads API to saturate all local cores
child_process.fork(
// Note that the fork API doesn't do any module resolution.
require.resolve('terser/bin/uglifyjs'), [inputFile, '--output', outputFile, ...residual]);

if (active < TERSER_CONCURENCY) {
exec([inputFile, outputFile]);
} else {
work.push([inputFile, outputFile])
}
}
});
}

if (!inputs.find(isDirectory)) {
if (!inputs.find(isDirectory) && inputs.length) {
// Inputs were only files
// Just use terser CLI exactly as it works outside bazel
require('terser/bin/uglifyjs');
require(TERSER_BINARY || 'terser/bin/uglifyjs');

} else if (inputs.length > 1) {
// We don't know how to merge multiple input dirs to one output dir
throw new Error('terser_minified only allows a single input when minifying a directory');
} else {

} else if (inputs[0]) {
terserDirectory(inputs[0]);
}

function spawn(cmd, args) {
return new Promise((resolve, reject) => {
const err = [];
const out = [];
// this may throw syncronously if the process cannot be created.
let proc = child_process.spawn(cmd, args);

proc.stdout.on('data', (buf) => {
out.push(buf);
});
proc.stderr.on('data', (buf) => {err.push(buf)})
proc.on('exit', (code) => {
// we never reject here based on exit code because an error is a valid result of running a
// process.
resolve({out: Buffer.concat(out), err: err.length ? Buffer.concat(err) : false, code});
});
})
}
11 changes: 11 additions & 0 deletions packages/terser/test/exec/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
load("@npm_bazel_jasmine//:index.from_src.bzl", "jasmine_node_test")

jasmine_node_test(
name = "test",
srcs = ["spec.js"],
coverage = True,
data = [
"@npm_bazel_terser//:index.js",
],
tags = ["exclusive"],
)
144 changes: 144 additions & 0 deletions packages/terser/test/exec/spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
const fs = require('fs');
const cp = require('child_process');
const path = require('path');
const util = require('util');
const assert = require('assert');

const terserWrap = require.resolve('npm_bazel_terser/index.js');

if (!fs.existsSync(terserWrap)) {
throw new Error(
'expected to find terserwrap javascript file at \n' + terserWrap + '\nbut it does not exist!')
}

if (process.platform === 'win32' && process.versions.node.split('.')[0] < 12) {
// skip this test if we're on node10.
// TODO: remove this when node 12 is the default
console.error(
'this test is only run on windows with nodejs >= 12 because there is an issue with spawning processes and ENOMEM errors.')
process.exit(0)
}

function terser(inputFile, outputFile, opts, attempts = 0) {
return cp.execFileSync(
process.execPath, [terserWrap, inputFile, '--output', outputFile], opts || {env: []})
}

describe('run terser', () => {
it('should fail', () => {
let thrown = false
try {
terser('loop.js', 'boop.js')
} catch (e) {
console.error(e, e + '')
assert.strictEqual(e.status, 1, 'exit code should be 1');
thrown = true;
}
assert.ok(thrown, 'should have thrown on missing input file.')

fs.writeFileSync('soup.js', 'omg soup!');

let stderr = '';
try {
terser('soup.js', 'boop.js')
} catch (e) {
assert.ok(e.status, 'exit code');
stderr = e.stderr + ''
}

assert.ok(
stderr.indexOf('Parse error at') > -1, 'should have parse error in output from terser.')
})

it('errors when cannot locate user defined terser binary', () => {
fs.writeFileSync('soup.js', '"valid";');

try {
terser('soup.js', 'boop.js', {env: {TERSER_BINARY: 'DOES_NOT_EXIST'}})
} catch (e) {
console.error(e, e + '')
assert.ok(e.status, 'exit code');
stderr = e.stderr + ''
}
})

// terser's default behavior waits for javascript from stdin if no arguments are provided.
// this doesnt match with how we can use it in bazel so we disable this feature.
it('exits when no arguments are provided.', async () => {
await util.promisify(cp.execFile)(process.execPath, [terserWrap], {env: []});
})

it('errors when any file in a directory fails', () => {
// retries are not run in clean directories
try {
fs.mkdirSync('fruit')
fs.mkdirSync('frout')
} catch (e) {
}

fs.writeFileSync('fruit/apples.js', 'yay apple!');
fs.writeFileSync('fruit/orange.js', '"orange.js"');
stderr = '';
try {
terser('fruit', 'frout')
} catch (e) {
console.log(e + '', e.status, e.stderr + '', e.stdout + '')

assert.strictEqual(e.status, 2, 'exit code 2');
stderr = e.stderr + ''
}

assert.ok(
stderr.indexOf('Parse error at') > -1, 'should have parse error in output from terser.');

assert.ok(
fs.existsSync('frout/orange.js'),
'even if one script has a parse error all other scripts should be processed.')
});

// always uses concurrency >= to the number of items to process
// this excercies a code path where the pending work queue is never used.
it('processes all files in a directory', () => {
// retries are not run in clean directories.
try {
fs.mkdirSync('fruit2');
fs.mkdirSync('frout2');
} catch (e) {
}

fs.writeFileSync('fruit2/apples.js', '"apple";');
fs.writeFileSync('fruit2/orange.js', '"orange!";');

try {
terser('fruit2', 'frout2', {env: {TERSER_CONCURRENCY: 2}})
} catch (e) {
assert.fail('should not have failed to process any javascript.')
}

assert.ok(fs.existsSync('frout2/orange.js'), 'minified js should have been created')
assert.ok(fs.existsSync('frout2/apples.js'), 'minified js should have been created')
});

// this excercies the code path where work has to be queued an dequeued based on exhausting the
// avaiable concurrency.
it('processes all files in a directory with single concurrency', () => {
// retries are not run in clean directories
try {
fs.mkdirSync('fruit3');
fs.mkdirSync('frout3');
} catch (e) {
}

fs.writeFileSync('fruit3/apples.js', '"apple";');
fs.writeFileSync('fruit3/orange.js', '"orange!";');

try {
terser('fruit3', 'frout3', {env: {TERSER_CONCURRENCY: 1}})
} catch (e) {
assert.fail('should not have failed to process any javascript.')
}

assert.ok(fs.existsSync('frout3/orange.js'), 'minified js should have been created')
assert.ok(fs.existsSync('frout3/apples.js'), 'minified js should have been created')
});
});

0 comments on commit e4427b3

Please sign in to comment.