Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: fix fs.write() externalized string handling #18216

Merged
merged 2 commits into from
Jan 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,10 @@
'FD_SETSIZE=1024',
# we need to use node's preferred "win32" rather than gyp's preferred "win"
'NODE_PLATFORM="win32"',
# Stop <windows.h> from defining macros that conflict with
# std::min() and std::max(). We don't use <windows.h> (much)
# but we still inherit it from uv.h.
'NOMINMAX',
'_UNICODE=1',
],
'libraries': [ '-lpsapi.lib' ]
Expand Down
50 changes: 32 additions & 18 deletions src/node_file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
# include <io.h>
#endif

#include <memory>
#include <vector>

namespace node {
Expand Down Expand Up @@ -1019,40 +1020,53 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {

CHECK(args[0]->IsInt32());

std::unique_ptr<char[]> delete_on_return;
Local<Value> req;
Local<Value> string = args[1];
Local<Value> value = args[1];
int fd = args[0]->Int32Value();
char* buf = nullptr;
int64_t pos;
size_t len;
const int64_t pos = GET_OFFSET(args[2]);
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
const auto is_async = args[4]->IsObject();

// Avoid copying the string when it is externalized but only when:
// 1. The target encoding is compatible with the string's encoding, and
// 2. The write is synchronous, otherwise the string might get neutered
// while the request is in flight, and
// 3. For UCS2, when the host system is little-endian. Big-endian systems
// need to call StringBytes::Write() to ensure proper byte swapping.
// The const_casts are conceptually sound: memory is read but not written.
if (!is_async && value->IsString()) {
auto string = value.As<String>();
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
auto ext = string->GetExternalOneByteStringResource();
buf = const_cast<char*>(ext->data());
len = ext->length();
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
auto ext = string->GetExternalStringResource();
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
len = ext->length() * sizeof(*ext->data());
}
}

// will assign buf and len if string was external
if (!StringBytes::GetExternalParts(string,
const_cast<const char**>(&buf),
&len)) {
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
len = StringBytes::StorageSize(env->isolate(), string, enc);
if (buf == nullptr) {
len = StringBytes::StorageSize(env->isolate(), value, enc);
buf = new char[len];
// SYNC_CALL returns on error. Make sure to always free the memory.
if (!is_async) delete_on_return.reset(buf);
// StorageSize may return too large a char, so correct the actual length
// by the write size
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
}
pos = GET_OFFSET(args[2]);

uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
uv_buf_t uvbuf = uv_buf_init(buf, len);

if (args[4]->IsObject()) {
if (is_async) {
CHECK_EQ(args.Length(), 5);
AsyncCall(env, args, "write", UTF8, AfterInteger,
uv_fs_write, fd, &uvbuf, 1, pos);
} else {
// SYNC_CALL returns on error. Make sure to always free the memory.
struct Delete {
inline explicit Delete(char* pointer) : pointer_(pointer) {}
inline ~Delete() { delete[] pointer_; }
char* const pointer_;
};
Delete delete_on_return(buf);
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
return args.GetReturnValue().Set(SYNC_RESULT);
}
Expand Down
113 changes: 29 additions & 84 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <limits.h>
#include <string.h> // memcpy

#include <algorithm>
#include <vector>

// When creating strings >= this length v8's gc spins up and consumes
Expand Down Expand Up @@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
}


bool StringBytes::GetExternalParts(Local<Value> val,
const char** data,
size_t* len) {
if (Buffer::HasInstance(val)) {
*data = Buffer::Data(val);
*len = Buffer::Length(val);
return true;
}

if (!val->IsString())
return false;

Local<String> str = val.As<String>();

if (str->IsExternalOneByte()) {
const String::ExternalOneByteStringResource* ext;
ext = str->GetExternalOneByteStringResource();
*data = ext->data();
*len = ext->length();
return true;

} else if (str->IsExternal()) {
const String::ExternalStringResource* ext;
ext = str->GetExternalStringResource();
*data = reinterpret_cast<const char*>(ext->data());
*len = ext->length() * sizeof(*ext->data());
return true;
}

return false;
}


size_t StringBytes::WriteUCS2(char* buf,
size_t buflen,
Local<String> str,
Expand Down Expand Up @@ -347,32 +316,31 @@ size_t StringBytes::Write(Isolate* isolate,
enum encoding encoding,
int* chars_written) {
HandleScope scope(isolate);
const char* data = nullptr;
size_t nbytes = 0;
const bool is_extern = GetExternalParts(val, &data, &nbytes);
const size_t external_nbytes = nbytes;
size_t nbytes;
int nchars;

if (chars_written == nullptr)
chars_written = &nchars;

CHECK(val->IsString() == true);
Local<String> str = val.As<String>();

if (nbytes > buflen)
nbytes = buflen;

int flags = String::HINT_MANY_WRITES_EXPECTED |
String::NO_NULL_TERMINATION |
String::REPLACE_INVALID_UTF8;

switch (encoding) {
case ASCII:
case LATIN1:
if (is_extern && str->IsOneByte()) {
memcpy(buf, data, nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = std::min(buflen, ext->length());
memcpy(buf, ext->data(), nbytes);
} else {
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
}
if (chars_written != nullptr)
*chars_written = nbytes;
*chars_written = nbytes;
break;

case BUFFER:
Expand All @@ -383,14 +351,8 @@ size_t StringBytes::Write(Isolate* isolate,
case UCS2: {
size_t nchars;

if (is_extern && !str->IsOneByte()) {
memcpy(buf, data, nbytes);
nchars = nbytes / sizeof(uint16_t);
} else {
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
}
if (chars_written != nullptr)
*chars_written = nchars;
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
*chars_written = static_cast<int>(nchars);

// Node's "ucs2" encoding wants LE character data stored in
// the Buffer, so we need to reorder on BE platforms. See
Expand All @@ -403,27 +365,25 @@ size_t StringBytes::Write(Isolate* isolate,
}

case BASE64:
if (is_extern) {
nbytes = base64_decode(buf, buflen, data, external_nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
} else {
String::Value value(str);
nbytes = base64_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = nbytes;
}
*chars_written = nbytes;
break;

case HEX:
if (is_extern) {
nbytes = hex_decode(buf, buflen, data, external_nbytes);
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
} else {
String::Value value(str);
nbytes = hex_decode(buf, buflen, *value, value.length());
}
if (chars_written != nullptr) {
*chars_written = nbytes;
}
*chars_written = nbytes;
break;

default:
Expand Down Expand Up @@ -500,49 +460,34 @@ size_t StringBytes::Size(Isolate* isolate,
Local<Value> val,
enum encoding encoding) {
HandleScope scope(isolate);
size_t data_size = 0;
bool is_buffer = Buffer::HasInstance(val);

if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
return Buffer::Length(val);

const char* data;
if (GetExternalParts(val, &data, &data_size))
return data_size;

Local<String> str = val->ToString(isolate);

switch (encoding) {
case ASCII:
case LATIN1:
data_size = str->Length();
break;
return str->Length();

case BUFFER:
case UTF8:
data_size = str->Utf8Length();
break;
return str->Utf8Length();

case UCS2:
data_size = str->Length() * sizeof(uint16_t);
break;
return str->Length() * sizeof(uint16_t);

case BASE64: {
String::Value value(str);
data_size = base64_decoded_size(*value, value.length());
break;
return base64_decoded_size(*value, value.length());
}

case HEX:
data_size = str->Length() / 2;
break;

default:
CHECK(0 && "unknown encoding");
break;
return str->Length() / 2;
}

return data_size;
UNREACHABLE();
}


Expand Down
6 changes: 0 additions & 6 deletions src/string_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ class StringBytes {
v8::Local<v8::Value> val,
enum encoding enc);

// If the string is external then assign external properties to data and len,
// then return true. If not return false.
static bool GetExternalParts(v8::Local<v8::Value> val,
const char** data,
size_t* len);

// Write the bytes from the string or buffer into the char*
// returns the number of bytes written, which will always be
// <= buflen. Use StorageSize/Size first to know how much
Expand Down
44 changes: 44 additions & 0 deletions test/parallel/test-fs-write.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.

// Flags: --expose_externalize_string
'use strict';
const common = require('../common');
const assert = require('assert');
Expand All @@ -30,6 +31,49 @@ const fn3 = path.join(common.tmpDir, 'write3.txt');
const expected = 'ümlaut.';
const constants = fs.constants;

/* eslint-disable no-undef */
common.allowGlobals(externalizeString, isOneByteString, x);

{
const expected = 'ümlaut eins'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(true, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'latin1');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
}

{
const expected = 'ümlaut zwei'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(true, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'utf8');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
}

{
const expected = '中文 1'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(false, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'ucs2');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
}

{
const expected = '中文 2'; // Must be a unique string.
externalizeString(expected);
assert.strictEqual(false, isOneByteString(expected));
const fd = fs.openSync(fn, 'w');
fs.writeSync(fd, expected, 0, 'utf8');
fs.closeSync(fd);
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
}

common.refreshTmpDir();

fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
Expand Down