Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: fix StringBytes::Write() #1042

Merged
merged 8 commits into from
Mar 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions src/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ static uv_async_t dispatch_debug_messages_async;

static Isolate* node_isolate = nullptr;

int WRITE_UTF8_FLAGS = v8::String::HINT_MANY_WRITES_EXPECTED |
v8::String::NO_NULL_TERMINATION;

class ArrayBufferAllocator : public ArrayBuffer::Allocator {
public:
// Impose an upper limit to avoid out of memory errors that bring down
Expand Down Expand Up @@ -3819,11 +3816,6 @@ static void StartNodeInstance(void* arg) {
int Start(int argc, char** argv) {
PlatformInit();

const char* replace_invalid = secure_getenv("NODE_INVALID_UTF8");

if (replace_invalid == nullptr)
WRITE_UTF8_FLAGS |= String::REPLACE_INVALID_UTF8;

CHECK_GT(argc, 0);

// Hack around with the argv pointer. Used for process.title = "blah".
Expand Down
3 changes: 0 additions & 3 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
if (max_length == 0)
return args.GetReturnValue().Set(0);

if (encoding == UCS2)
max_length = max_length / 2;

if (offset >= obj_length)
return env->ThrowRangeError("Offset is out of bounds");

Expand Down
98 changes: 49 additions & 49 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ class ExternString: public ResourceType {
public:
~ExternString() override {
delete[] data_;
int64_t change_in_bytes = -static_cast<int64_t>(length_);
isolate()->AdjustAmountOfExternalAllocatedMemory(change_in_bytes);
isolate()->AdjustAmountOfExternalAllocatedMemory(-byte_length());
}

const TypeName* data() const override {
Expand All @@ -40,6 +39,10 @@ class ExternString: public ResourceType {
return length_;
}

int64_t byte_length() const {
return length() * sizeof(*data());
}

static Local<String> NewFromCopy(Isolate* isolate,
const TypeName* data,
size_t length) {
Expand Down Expand Up @@ -69,7 +72,7 @@ class ExternString: public ResourceType {
data,
length);
Local<String> str = String::NewExternal(isolate, h_str);
isolate->AdjustAmountOfExternalAllocatedMemory(length);
isolate->AdjustAmountOfExternalAllocatedMemory(h_str->byte_length());

return scope.Escape(str);
}
Expand Down Expand Up @@ -260,7 +263,7 @@ bool StringBytes::GetExternalParts(Isolate* isolate,
const String::ExternalStringResource* ext;
ext = str->GetExternalStringResource();
*data = reinterpret_cast<const char*>(ext->data());
*len = ext->length();
*len = ext->length() * sizeof(*ext->data());
return true;
}

Expand All @@ -276,82 +279,83 @@ size_t StringBytes::Write(Isolate* isolate,
int* chars_written) {
HandleScope scope(isolate);
const char* data = nullptr;
size_t len = 0;
bool is_extern = GetExternalParts(isolate, val, &data, &len);
size_t extlen = len;
size_t nbytes = 0;
const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
const size_t external_nbytes = nbytes;

CHECK(val->IsString() == true);
Local<String> str = val.As<String>();
len = len < buflen ? len : buflen;

int flags = String::NO_NULL_TERMINATION |
String::HINT_MANY_WRITES_EXPECTED;
if (nbytes > buflen)
nbytes = buflen;

int flags = String::HINT_MANY_WRITES_EXPECTED |
String::NO_NULL_TERMINATION |
String::REPLACE_INVALID_UTF8;

switch (encoding) {
case ASCII:
case BINARY:
case BUFFER:
if (is_extern)
memcpy(buf, data, len);
else
len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
0,
buflen,
flags);
if (is_extern && str->IsOneByte()) {
memcpy(buf, data, nbytes);
} else {
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
}
if (chars_written != nullptr)
*chars_written = len;
*chars_written = nbytes;
break;

case UTF8:
if (is_extern)
// TODO(tjfontaine) should this validate invalid surrogate pairs as
// well?
memcpy(buf, data, len);
else
len = str->WriteUtf8(buf, buflen, chars_written, WRITE_UTF8_FLAGS);
nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
break;

case UCS2:
if (is_extern)
memcpy(buf, data, len * 2);
else
len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
case UCS2: {
uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
size_t nchars;
if (is_extern && !str->IsOneByte()) {
memcpy(buf, data, nbytes);
nchars = nbytes / sizeof(*dst);
} else {
nchars = buflen / sizeof(*dst);
nchars = str->Write(dst, 0, nchars, flags);
nbytes = nchars * sizeof(*dst);
}
if (IsBigEndian()) {
// Node's "ucs2" encoding wants LE character data stored in
// the Buffer, so we need to reorder on BE platforms. See
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
// encoding specification
uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
for (size_t i = 0; i < len; i++) {
buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
}
for (size_t i = 0; i < nchars; i++)
dst[i] = dst[i] << 8 | dst[i] >> 8;
}
if (chars_written != nullptr)
*chars_written = len;
len = len * sizeof(uint16_t);
*chars_written = nchars;
break;
}

case BASE64:
if (is_extern) {
len = base64_decode(buf, buflen, data, extlen);
nbytes = base64_decode(buf, buflen, data, external_nbytes);
} else {
String::Value value(str);
len = base64_decode(buf, buflen, *value, value.length());
nbytes = base64_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = len;
*chars_written = nbytes;
}
break;

case HEX:
if (is_extern) {
len = hex_decode(buf, buflen, data, extlen);
nbytes = hex_decode(buf, buflen, data, external_nbytes);
} else {
String::Value value(str);
len = hex_decode(buf, buflen, *value, value.length());
nbytes = hex_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = len * 2;
*chars_written = nbytes;
}
break;

Expand All @@ -360,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
break;
}

return len;
return nbytes;
}


Expand Down Expand Up @@ -754,21 +758,17 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
Local<Value> StringBytes::Encode(Isolate* isolate,
const uint16_t* buf,
size_t buflen) {
const uint16_t* src = buf;

Local<String> val;

if (buflen < EXTERN_APEX) {
val = String::NewFromTwoByte(isolate,
src,
buf,
String::kNormalString,
buflen);
} else {
val = ExternTwoByteString::NewFromCopy(isolate, src, buflen);
val = ExternTwoByteString::NewFromCopy(isolate, buf, buflen);
}

if (src != buf)
delete[] src;

return val;
}

Expand Down
2 changes: 0 additions & 2 deletions src/string_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

namespace node {

extern int WRITE_UTF8_FLAGS;

class StringBytes {
public:
class InlineDecoder {
Expand Down
36 changes: 14 additions & 22 deletions src/util.cc
Original file line number Diff line number Diff line change
@@ -1,36 +1,28 @@
#include "util.h"

#include "string_bytes.h"

namespace node {

Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Handle<v8::Value> value)
: length_(0), str_(nullptr) {
: length_(0), str_(str_st_) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lint didn't catch this before?

if (value.IsEmpty())
return;

v8::Local<v8::String> val_ = value->ToString(isolate);
if (val_.IsEmpty())
v8::Local<v8::String> string = value->ToString(isolate);
if (string.IsEmpty())
return;

// Allocate enough space to include the null terminator
size_t len = StringBytes::StorageSize(val_, UTF8) + 1;

char* str;
if (len > kStorageSize)
str = static_cast<char*>(malloc(len));
else
str = str_st_;
CHECK_NE(str, NULL);

int flags = WRITE_UTF8_FLAGS;

length_ = val_->WriteUtf8(str,
len,
0,
flags);
str[length_] = '\0';

str_ = reinterpret_cast<char*>(str);
size_t len = StringBytes::StorageSize(string, UTF8) + 1;
if (len > sizeof(str_st_)) {
str_ = static_cast<char*>(malloc(len));
CHECK_NE(str_, nullptr);
}

const int flags =
v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
length_ = string->WriteUtf8(str_, len, 0, flags);
str_[length_] = '\0';
}

} // namespace node
3 changes: 1 addition & 2 deletions src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,9 @@ class Utf8Value {
};

private:
static const int kStorageSize = 1024;
size_t length_;
char str_st_[kStorageSize];
char* str_;
char str_st_[1024];
};

} // namespace node
Expand Down
26 changes: 17 additions & 9 deletions test/parallel/test-stringbytes-external.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,10 @@ assert.equal(b[0], 0x61);
assert.equal(b[1], 0);
assert.equal(ucs2_control, c);


// grow the strings to proper length
while (write_str.length <= EXTERN_APEX) {
write_str += write_str;
ucs2_control += ucs2_control;
}
write_str += write_str.substr(0, EXTERN_APEX - write_str.length);
ucs2_control += ucs2_control.substr(0, EXTERN_APEX * 2 - ucs2_control.length);

// now create big strings
var size = 1 + (1 << 20);
write_str = Array(size).join(write_str);
ucs2_control = Array(size).join(ucs2_control);

// check resultant buffer and output string
var b = new Buffer(write_str, 'ucs2');
Expand Down Expand Up @@ -111,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS;
}
}
})();

// https://github.com/iojs/io.js/issues/1024
(function() {
var a = Array(1 << 20).join('x');
var b = Buffer(a, 'ucs2').toString('ucs2');
var c = Buffer(b, 'utf8').toString('utf8');

assert.equal(a.length, b.length);
assert.equal(b.length, c.length);

assert.equal(a, b);
assert.equal(b, c);
})();