Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for utf8 chars #7

Merged
merged 13 commits into from
Jan 20, 2016
18 changes: 18 additions & 0 deletions bench.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'use strict'

var split = require('./')
var bench = require('fastbench')
var fs = require('fs')

function benchSplit (cb) {
fs.createReadStream('package.json')
.pipe(split())
.on('end', cb)
.resume()
}

var run = bench([
benchSplit
], 10000)

run(run)
15 changes: 6 additions & 9 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,20 @@ IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
'use strict';

var through = require('through2')
var StringDecoder = require('string_decoder').StringDecoder

function transform(chunk, enc, cb) {
var list = chunk.toString('utf8').split(this.matcher)
, remaining = list.pop()
this._last += this._decoder.write(chunk)

var list = this._last.toString('utf8').split(this.matcher)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

toString() is not needed when we use a string decoder

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done :)

, i

if (list.length >= 1) {
push(this, this.mapper((this._last + list.shift())))
} else {
remaining = this._last + remaining
}
this._last = list.pop()

for (i = 0; i < list.length; i++) {
push(this, this.mapper(list[i]))
}

this._last = remaining

cb()
}

Expand Down Expand Up @@ -96,6 +92,7 @@ function split(matcher, mapper, options) {
stream._readableState.objectMode = true;

stream._last = ''
stream._decoder = new StringDecoder('utf8')
stream.matcher = matcher
stream.mapper = mapper

Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "split a Text Stream into a Line Stream, using Stream 3",
"main": "index.js",
"scripts": {
"test": "tap test.js"
"test": "tap -b test.js"
},
"pre-commit": [
"test"
Expand All @@ -21,6 +21,7 @@
"license": "ISC",
"devDependencies": {
"callback-stream": "^1.1.0",
"fastbench": "^1.0.0",
"pre-commit": "^1.1.2",
"tap": "^5.0.0"
},
Expand Down
23 changes: 22 additions & 1 deletion test.js
Original file line number Diff line number Diff line change
Expand Up @@ -217,5 +217,26 @@ test('split utf8 chars', function(t) {
t.deepEqual(list, ['烫烫烫', '锟斤拷'])
}))

input.end("烫烫烫\r\n锟斤拷")
var buf = new Buffer("烫烫烫\r\n锟斤拷", "utf8");
for (var i = 0; i < buf.length; ++i) {
input.write(buf.slice(i, i + 1))
}
input.end();
})

test('split utf8 chars 2by2', function(t) {
t.plan(1)

var input = split()

input.pipe(strcb(function(err, list) {
t.deepEqual(list, ['烫烫烫', '烫烫烫'])
}))

var str = "烫烫烫\r\n烫烫烫";
var buf = new Buffer(str, "utf8");
for (var i = 0; i < buf.length; i+=2) {
input.write(buf.slice(i, i + 2))
}
input.end();
})