Skip to content

Commit

Permalink
Merge pull request #53 from derjust/master
Browse files Browse the repository at this point in the history
Strict record handling
  • Loading branch information
doug-martin committed Mar 3, 2015
2 parents 1707f04 + e859b69 commit 4558ccc
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 2 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ All methods accept the following `options`
* `headers=false`: Set to true if you expect the first line of your `CSV` to contain headers, alternatly you can specify an array of headers to use.
* `ignoreEmpty=false`: If you wish to ignore empty rows.
* `discardUnmappedColumns=false`: If you want to discard columns that do not map to a header.
* `strictColumnHandling=false`: If you want to consider empty lines/lines with too few fields as errors - Only to be used with `headers=true`
* `delimiter=','`: If your data uses an alternate delimiter such as `;` or `\t`.
* **NOTE** When specifying an alternate `delimiter` you may only pass in a single character delimiter
* `quote='"'`: The character to use to escape values that contain a delimiter. If you set to `null` then all quoting will be ignored
Expand All @@ -35,7 +36,7 @@ All methods accept the following `options`

* `data`: Emitted when a record is parsed.
* `record`: Emitted when a record is parsed. **DEPRECATED**
* `data-invalid`: Emitted if there was invalid row encounted, **only emitted if the `validate` function is used**.
* `data-invalid`: Emitted if there was invalid row encounted, **only emitted if the `validate` function is used or `strictColumnHandling=true`**.
* `data`: Emitted with the object or `stringified` version if the `objectMode` is set to `false`.

**`([options])` or `.parse(options)`**
Expand Down
10 changes: 10 additions & 0 deletions lib/parser/parser_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ function ParserStream(options) {
this._headers = options.headers;
this._ignoreEmpty = options.ignoreEmpty;
this._discardUnmappedColumns = options.discardUnmappedColumns;
this._strictColumnHandling = options.strictColumnHandling;
this.__objectMode = options.objectMode;
this.__buffered = [];
return this;
Expand Down Expand Up @@ -112,6 +113,7 @@ extended(ParserStream).extend({
__processHeaders: function (rows, cb) {
var headers = this._headers,
discardUnmappedColumns = this._discardUnmappedColumns,
strictColumnHandling = this._strictColumnHandling,
self = this;

function headerHandler(err, headers) {
Expand All @@ -125,14 +127,22 @@ extended(ParserStream).extend({
if (data.length > headersLength) {
if (discardUnmappedColumns) {
data.splice(headersLength);
} else if (strictColumnHandling) {
self.emit("data-invalid", data);
return orig(null, cb);
} else {
self.emit("error", new Error("Unexpected Error: column header mismatch expected: " + headersLength + " columns got: " + data.length));
return orig(null, cb);
}
} else if (strictColumnHandling && (data.length < headersLength)) {
self.emit("data-invalid", data);
return orig(null, cb);
}
while (++i < headersLength) {
val = data[i];
ret[headers[i]] = isUndefined(val) ? '' : val;
}

return orig(ret, cb);
};
}
Expand Down
2 changes: 1 addition & 1 deletion test/assets/test24.csv
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ First7,Last7,email7@email.com,"7 Street St, State ST, 88888"
First8,Last8,email8@email.com,"8 Street St, State ST, 88888"
#Line 9
First9,Last9,email9@email.com,"9 Street St, State ST, 88888"
#End of CSV
#End of CSV
4 changes: 4 additions & 0 deletions test/assets/test25.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
first_name,last_name,email_address,extra
First1,Last1,email1@email.com,Extra1
First2,Last2,email2@email.com,
First3,Last3,email3@email.com,Extra2
28 changes: 28 additions & 0 deletions test/parsing.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,13 @@ var expected23 = [
{"first_name": "First3", "last_name": "Last3", "email_address": "email3@email.com"}
];

var expected25 = [
{"first_name": "First1", "last_name": "Last1", "email_address": "email1@email.com", "extra": "Extra1"},
{"first_name": "First3", "last_name": "Last3", "email_address": "email3@email.com", "extra": "Extra2"}
];

var expected25_invalid = [ 'First2', 'Last2', 'email2@email.com' ];

it.describe("fast-csv parsing", function (it) {

it.timeout(60000);
Expand Down Expand Up @@ -985,6 +992,27 @@ it.describe("fast-csv parsing", function (it) {
});
});

it.should("report missing columns that do not exist but have a header with strictColumnHandling option", function (next) {
var actual = [];
var reachedInvalid = false;
csv
.fromPath(path.resolve(__dirname, "./assets/test25.csv"), {headers: true, strictColumnHandling: true})
.on("data", function (data) {
actual.push(data);
})
.on("data-invalid", function(actual) {
assert.deepEqual(actual, expected25_invalid);
reachedInvalid = true;
})
.on("error", next)
.on("end", function (count) {
assert.equal(true, reachedInvalid);
assert.deepEqual(actual, expected25);
assert.equal(count, actual.length);
next();
});
});

it.describe("alternate delimiters", function (it) {
it.should("support tab delimiters", function (next) {
var actual = [];
Expand Down

0 comments on commit 4558ccc

Please sign in to comment.