Skip to content

Commit

Permalink
module: detect ESM syntax by trying to recompile as SourceTextModule
Browse files Browse the repository at this point in the history
Instead of using an async function wrapper, just try compiling code with
unknown module format as SourceTextModule when it cannot be compiled
as CJS and the error message indicates that it's worth a retry. If
it can be parsed as SourceTextModule then it's considered ESM.

Also, move shouldRetryAsESM() to C++ completely so that
we can reuse it in the CJS module loader for require(esm).

Drive-by: move methods that don't belong to ContextifyContext
out as static methods and move GetHostDefinedOptions to
ModuleWrap.

PR-URL: #52413
Reviewed-By: Geoffrey Booth <[email protected]>
Reviewed-By: Jacob Smith <[email protected]>
  • Loading branch information
joyeecheung authored Apr 19, 2024
1 parent a6b80c7 commit 651fa04
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 357 deletions.
4 changes: 2 additions & 2 deletions lib/internal/modules/esm/get_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
if (getOptionValue('--experimental-detect-module')) {
const format = source ?
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
null;
if (format === 'module') {
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
Expand Down Expand Up @@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
if (!source) { return null; }
const format = getFormatOfExtensionlessFile(url);
if (format === 'module') {
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
}
return format;
}
Expand Down
34 changes: 0 additions & 34 deletions lib/internal/modules/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@ const {
} = require('internal/errors').codes;
const { BuiltinModule } = require('internal/bootstrap/realm');

const {
shouldRetryAsESM: contextifyShouldRetryAsESM,
constants: {
syntaxDetectionErrors: {
esmSyntaxErrorMessages,
throwsOnlyInCommonJSErrorMessages,
},
},
} = internalBinding('contextify');
const { validateString } = require('internal/validators');
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
const internalFS = require('internal/fs/utils');
Expand Down Expand Up @@ -329,30 +320,6 @@ function normalizeReferrerURL(referrerName) {
}


let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
/**
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
* @param {string} source Module contents
*/
function shouldRetryAsESM(errorMessage, source) {
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
return true;
}

throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
}

return false;
}

/**
* @param {string|undefined} url URL to convert to filename
*/
Expand Down Expand Up @@ -382,7 +349,6 @@ module.exports = {
loadBuiltinModule,
makeRequireFunction,
normalizeReferrerURL,
shouldRetryAsESM,
stripBOM,
toRealPath,
hasStartedUserCJSExecution() {
Expand Down
26 changes: 16 additions & 10 deletions lib/internal/modules/run_main.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
'use strict';

const {
ObjectGetPrototypeOf,
StringPrototypeEndsWith,
SyntaxErrorPrototype,
globalThis,
} = primordials;

Expand Down Expand Up @@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
function executeUserEntryPoint(main = process.argv[1]) {
const resolvedMain = resolveMainPath(main);
const useESMLoader = shouldUseESMLoader(resolvedMain);

let mainURL;
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
let retryAsESM = false;
if (!useESMLoader) {
const cjsLoader = require('internal/modules/cjs/loader');
const { Module } = cjsLoader;
if (getOptionValue('--experimental-detect-module')) {
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
try {
// Module._load is the monkey-patchable CJS module loader.
Module._load(main, null, true);
} catch (error) {
const source = cjsLoader.entryPointSource;
const { shouldRetryAsESM } = require('internal/modules/helpers');
retryAsESM = shouldRetryAsESM(error.message, source);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
const { shouldRetryAsESM } = internalBinding('contextify');
const mainPath = resolvedMain || main;
mainURL = pathToFileURL(mainPath).href;
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
}
if (!retryAsESM) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(error, source, resolvedMain);
throw error;
}
}
Expand All @@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {

if (useESMLoader || retryAsESM) {
const mainPath = resolvedMain || main;
const mainURL = pathToFileURL(mainPath).href;
if (mainURL === undefined) {
mainURL = pathToFileURL(mainPath).href;
}

runEntryPointWithESMLoader((cascadedLoader) => {
// Note that if the graph contains unsettled TLA, this may never resolve
Expand Down
152 changes: 100 additions & 52 deletions src/module_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,17 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
return v8::Just(false);
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
Isolate* isolate, Local<Symbol> id_symbol) {
Local<PrimitiveArray> host_defined_options =
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
return host_defined_options;
}

// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption)
// idSymbol);
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
CHECK(args.IsConstructCall());
Expand Down Expand Up @@ -183,9 +191,10 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
// cjsModule])
CHECK(args[3]->IsFunction());
} else {
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
// cachedData]);
// new ModuleWrap(url, context, source, lineOffset, columOffset,
// hostDefinedOption)
// idSymbol);
CHECK(args[2]->IsString());
CHECK(args[3]->IsNumber());
line_offset = args[3].As<Int32>()->Value();
Expand All @@ -199,7 +208,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
} else {
id_symbol = Symbol::New(isolate, url);
}
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);

if (that->SetPrivate(context,
realm->isolate_data()->host_defined_option_symbol(),
Expand Down Expand Up @@ -234,50 +243,34 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
module = Module::CreateSyntheticModule(
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
} else {
ScriptCompiler::CachedData* cached_data = nullptr;
bool used_cache_from_user = false;
// When we are compiling for the default loader, this will be
// std::nullopt, and CompileSourceTextModule() should use
// on-disk cache.
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
if (id_symbol !=
realm->isolate_data()->source_text_module_default_hdo()) {
user_cached_data = nullptr;
}
if (args[5]->IsArrayBufferView()) {
DCHECK(!can_use_builtin_cache); // We don't use this option internally.
used_cache_from_user = true;
CHECK(!can_use_builtin_cache); // We don't use this option internally.
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
uint8_t* data =
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
cached_data =
user_cached_data =
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
cached_data_buf->ByteLength());
}

Local<String> source_text = args[2].As<String>();
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);

CompileCacheEntry* cache_entry = nullptr;
if (can_use_builtin_cache && realm->env()->use_compile_cache()) {
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
source_text, url, CachedCodeType::kESM);
}
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
// source will take ownership of cached_data.
cached_data = cache_entry->CopyCache();
}

ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (source.GetCachedData() == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}
if (!ScriptCompiler::CompileModule(isolate, &source, options)
bool cache_rejected = false;
if (!CompileSourceTextModule(realm,
source_text,
url,
line_offset,
column_offset,
host_defined_options,
user_cached_data,
&cache_rejected)
.ToLocal(&module)) {
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
CHECK(!try_catch.Message().IsEmpty());
Expand All @@ -291,18 +284,8 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
return;
}

bool cache_rejected = false;
if (options == ScriptCompiler::kConsumeCodeCache) {
cache_rejected = source.GetCachedData()->rejected;
}

if (cache_entry != nullptr) {
realm->env()->compile_cache_handler()->MaybeSave(
cache_entry, module, cache_rejected);
}

// If the cache comes from builtin compile cache, fail silently.
if (cache_rejected && used_cache_from_user) {
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
cache_rejected) {
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
realm, "cachedData buffer was rejected");
try_catch.ReThrow();
Expand Down Expand Up @@ -345,6 +328,71 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(that);
}

MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
Realm* realm,
Local<String> source_text,
Local<String> url,
int line_offset,
int column_offset,
Local<PrimitiveArray> host_defined_options,
std::optional<ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected) {
Isolate* isolate = realm->isolate();
EscapableHandleScope scope(isolate);
ScriptOrigin origin(isolate,
url,
line_offset,
column_offset,
true, // is cross origin
-1, // script id
Local<Value>(), // source map URL
false, // is opaque (?)
false, // is WASM
true, // is ES Module
host_defined_options);
ScriptCompiler::CachedData* cached_data = nullptr;
CompileCacheEntry* cache_entry = nullptr;
// When compiling for the default loader, user_cached_data is std::nullptr.
// When compiling for vm.Module, it's either nullptr or a pointer to the
// cached data.
if (user_cached_data.has_value()) {
cached_data = user_cached_data.value();
} else if (realm->env()->use_compile_cache()) {
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
source_text, url, CachedCodeType::kESM);
}

if (cache_entry != nullptr && cache_entry->cache != nullptr) {
// source will take ownership of cached_data.
cached_data = cache_entry->CopyCache();
}

ScriptCompiler::Source source(source_text, origin, cached_data);
ScriptCompiler::CompileOptions options;
if (cached_data == nullptr) {
options = ScriptCompiler::kNoCompileOptions;
} else {
options = ScriptCompiler::kConsumeCodeCache;
}

Local<Module> module;
if (!ScriptCompiler::CompileModule(isolate, &source, options)
.ToLocal(&module)) {
return scope.EscapeMaybe(MaybeLocal<Module>());
}

if (options == ScriptCompiler::kConsumeCodeCache) {
*cache_rejected = source.GetCachedData()->rejected;
}

if (cache_entry != nullptr) {
realm->env()->compile_cache_handler()->MaybeSave(
cache_entry, module, *cache_rejected);
}

return scope.Escape(module);
}

static Local<Object> createImportAttributesContainer(
Realm* realm,
Isolate* isolate,
Expand Down
21 changes: 20 additions & 1 deletion src/module_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <unordered_map>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
#include "base_object.h"
#include "v8-script.h"

namespace node {

Expand Down Expand Up @@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
return true;
}

static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);

// When user_cached_data is not std::nullopt, use the code cache if it's not
// nullptr, otherwise don't use code cache.
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
// See: https://github.com/nodejs/node/issues/47472
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
Realm* realm,
v8::Local<v8::String> source_text,
v8::Local<v8::String> url,
int line_offset,
int column_offset,
v8::Local<v8::PrimitiveArray> host_defined_options,
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
bool* cache_rejected);

private:
ModuleWrap(Realm* realm,
v8::Local<v8::Object> object,
Expand Down
Loading

0 comments on commit 651fa04

Please sign in to comment.