From 91a302356b04572a191d16158739dac7f096e719 Mon Sep 17 00:00:00 2001 From: Chengzhong Wu Date: Wed, 29 Jan 2025 14:58:25 +0000 Subject: [PATCH] inspector: fix StringUtil::CharacterCount for unicodes `StringUtil::CharacterCount` should return the length of underlying representation storage of a protocol string. `StringUtil::CharacterCount` is only used in DictionaryValue serialization. Only `Network.Headers` is an object type, represented with DictionaryValue. PR-URL: https://github.com/nodejs/node/pull/56788 Reviewed-By: James M Snell Reviewed-By: Yagiz Nizipli Reviewed-By: Daniel Lemire Reviewed-By: Kohei Ueno --- Makefile | 2 + node.gyp | 9 ++++ src/inspector/node_string.cc | 11 ++--- src/inspector/node_string.h | 3 ++ test/cctest/inspector/test_node_protocol.cc | 33 +++++++++++++++ test/common/inspector-helper.js | 29 ++++++++++++- .../test-inspector-network-arbitrary-data.js | 41 +++++++++++++++++++ 7 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 test/cctest/inspector/test_node_protocol.cc create mode 100644 test/parallel/test-inspector-network-arbitrary-data.js diff --git a/Makefile b/Makefile index 419aa6c64f00f6..ecee2fa5d9d2a3 100644 --- a/Makefile +++ b/Makefile @@ -1474,6 +1474,8 @@ LINT_CPP_FILES = $(filter-out $(LINT_CPP_EXCLUDE), $(wildcard \ src/*/*.h \ test/addons/*/*.cc \ test/addons/*/*.h \ + test/cctest/*/*.cc \ + test/cctest/*/*.h \ test/cctest/*.cc \ test/cctest/*.h \ test/embedding/*.cc \ diff --git a/node.gyp b/node.gyp index 7dcde96cacc473..6166f9d577ea41 100644 --- a/node.gyp +++ b/node.gyp @@ -432,6 +432,7 @@ 'test/cctest/test_quic_tokens.cc', ], 'node_cctest_inspector_sources': [ + 'test/cctest/inspector/test_node_protocol.cc', 'test/cctest/test_inspector_socket.cc', 'test/cctest/test_inspector_socket_server.cc', ], @@ -1210,6 +1211,14 @@ 'HAVE_INSPECTOR=1', ], 'sources': [ '<@(node_cctest_inspector_sources)' ], + 'include_dirs': [ + # TODO(legendecas): make node_inspector.gypi a dependable target. + '<(SHARED_INTERMEDIATE_DIR)', # for inspector + '<(SHARED_INTERMEDIATE_DIR)/src', # for inspector + ], + 'dependencies': [ + 'deps/inspector_protocol/inspector_protocol.gyp:crdtp', + ], }, { 'defines': [ 'HAVE_INSPECTOR=0', diff --git a/src/inspector/node_string.cc b/src/inspector/node_string.cc index d83c53c81ca774..6db4bee1072bfe 100644 --- a/src/inspector/node_string.cc +++ b/src/inspector/node_string.cc @@ -85,11 +85,12 @@ const uint8_t* StringUtil::CharactersUTF8(const std::string_view s) { } size_t StringUtil::CharacterCount(const std::string_view s) { - // The utf32_length_from_utf8 function calls count_utf8. - // The count_utf8 function counts the number of code points - // (characters) in the string, assuming that the string is valid Unicode. - // TODO(@anonrig): Test to make sure CharacterCount returns correctly. - return simdutf::utf32_length_from_utf8(s.data(), s.length()); + // Return the length of underlying representation storage. + // E.g. for std::basic_string_view, return its byte length. + // If we adopt a variant underlying store string type, like + // `v8_inspector::StringView`, for UTF16, return the length of the + // underlying uint16_t store. + return s.length(); } } // namespace protocol diff --git a/src/inspector/node_string.h b/src/inspector/node_string.h index d529d1337be0e2..38cf96e874dcc4 100644 --- a/src/inspector/node_string.h +++ b/src/inspector/node_string.h @@ -32,6 +32,9 @@ using StringBuilder = std::ostringstream; using ProtocolMessage = std::string; // Implements StringUtil methods used in `inspector_protocol/lib/`. +// Refer to +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/core/inspector/v8_inspector_string.h;l=40;drc=2b15d6974a49d3a14d3d67ae099a649d523a828d +// for more details about the interface. struct StringUtil { // Convert Utf16 in local endianness to Utf8 if needed. static String StringViewToUtf8(v8_inspector::StringView view); diff --git a/test/cctest/inspector/test_node_protocol.cc b/test/cctest/inspector/test_node_protocol.cc new file mode 100644 index 00000000000000..57e42e51adb859 --- /dev/null +++ b/test/cctest/inspector/test_node_protocol.cc @@ -0,0 +1,33 @@ +#include "crdtp/json.h" +#include "gtest/gtest.h" +#include "inspector/node_json.h" +#include "node/inspector/protocol/Protocol.h" + +namespace node { +namespace inspector { +namespace protocol { +namespace { + +TEST(InspectorProtocol, Utf8StringSerDes) { + constexpr const char* kKey = "unicode_key"; + constexpr const char* kValue = "CJK 汉字 🍱 🧑‍🧑‍🧒‍🧒"; + std::unique_ptr val = DictionaryValue::create(); + val->setString(kKey, kValue); + + std::vector cbor = val->Serialize(); + std::string json; + crdtp::Status status = + crdtp::json::ConvertCBORToJSON(crdtp::SpanFrom(cbor), &json); + CHECK(status.ok()); + + std::unique_ptr parsed = + DictionaryValue::cast(JsonUtil::parseJSON(json)); + std::string parsed_value; + CHECK(parsed->getString(kKey, &parsed_value)); + CHECK_EQ(parsed_value, std::string(kValue)); +} + +} // namespace +} // namespace protocol +} // namespace inspector +} // namespace node diff --git a/test/common/inspector-helper.js b/test/common/inspector-helper.js index 864538f245e2ea..3c7277c24a5522 100644 --- a/test/common/inspector-helper.js +++ b/test/common/inspector-helper.js @@ -6,7 +6,7 @@ const http = require('http'); const fixtures = require('../common/fixtures'); const { spawn } = require('child_process'); const { URL, pathToFileURL } = require('url'); -const { EventEmitter } = require('events'); +const { EventEmitter, once } = require('events'); const _MAINSCRIPT = fixtures.path('loop.js'); const DEBUG = false; @@ -544,6 +544,33 @@ function fires(promise, error, timeoutMs) { ]); } +/** + * When waiting for inspector events, there might be no handles on the event + * loop, and leads to process exits. + * + * This function provides a utility to wait until a inspector event for a certain + * time. + */ +function waitUntil(session, eventName, timeout = 1000) { + const resolvers = Promise.withResolvers(); + const timer = setTimeout(() => { + resolvers.reject(new Error(`Wait for inspector event ${eventName} timed out`)); + }, timeout); + + once(session, eventName) + .then((res) => { + resolvers.resolve(res); + clearTimeout(timer); + }, (error) => { + // This should never happen. + resolvers.reject(error); + clearTimeout(timer); + }); + + return resolvers.promise; +} + module.exports = { NodeInstance, + waitUntil, }; diff --git a/test/parallel/test-inspector-network-arbitrary-data.js b/test/parallel/test-inspector-network-arbitrary-data.js new file mode 100644 index 00000000000000..2df76010f53082 --- /dev/null +++ b/test/parallel/test-inspector-network-arbitrary-data.js @@ -0,0 +1,41 @@ +// Flags: --inspect=0 --experimental-network-inspection +'use strict'; +const common = require('../common'); + +common.skipIfInspectorDisabled(); + +const inspector = require('node:inspector/promises'); +const { Network } = require('node:inspector'); +const test = require('node:test'); +const assert = require('node:assert'); +const { waitUntil } = require('../common/inspector-helper'); + +const session = new inspector.Session(); +session.connect(); + +test('should emit Network.requestWillBeSent with unicode', async () => { + await session.post('Network.enable'); + const expectedValue = 'CJK 汉字 🍱 🧑‍🧑‍🧒‍🧒'; + + const requestWillBeSentFuture = waitUntil(session, 'Network.requestWillBeSent') + .then(([event]) => { + assert.strictEqual(event.params.request.url, expectedValue); + assert.strictEqual(event.params.request.method, expectedValue); + assert.strictEqual(event.params.request.headers.mKey, expectedValue); + }); + + Network.requestWillBeSent({ + requestId: '1', + timestamp: 1, + wallTime: 1, + request: { + url: expectedValue, + method: expectedValue, + headers: { + mKey: expectedValue, + }, + }, + }); + + await requestWillBeSentFuture; +});