From 7d2acb58f0063211a0ede54e3727b3cce14065e6 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Fri, 11 Jan 2019 19:53:11 -0800 Subject: [PATCH] fread drop when NULL in colClasses (#3277) --- inst/tests/tests.Rraw | 5 +++++ src/freadR.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 48055c384..0e7a318d1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11143,6 +11143,11 @@ test(1825.15, fread(str, colClasses=list(numeric=2, NULL=3:5), drop=1), data.table(x2=c(2,4), x3=c(1.5,2.5), x4=c("T","F"), x5=c("cc","ff")), warning="Ignoring the NULL item in colClasses= because select= or drop= has been used") # warning because drop != colClasses$`NULL` test(1825.16, fread(str, colClasses=(cl<-list(numeric=2, NULL=3:5)), drop=cl$`NULL`), data.table(x1=INT(1,3), x2=c(2,4))) # cover commit f0bd6e3 +# NULL didn't work in 1.11.0-1.11.8 so some usage exists where drop= is used to respecify the NULLs. The warning could be reintroduced in future. +# https://github.com/Rdatatable/data.table/issues/3233#issuecomment-453674647 +test(1825.17, fread(str, colClasses=c("integer","integer","NULL","character","NULL"), drop=3), data.table(x1=INT(1,3), x2=INT(2,4), x4=c("T","F"))) +test(1825.18, fread(str, colClasses=c("integer","numeric","NULL","character","NULL"), drop=3:4), data.table(x1=INT(1,3), x2=c(2,4))) +test(1825.19, fread(str, drop=6), data.table(x1=INT(1,3), x2=INT(2,4), x3=c(1.5,2.5), x4=c("T","F"), x5=c("cc","ff")), warning="Column number 6 (drop[1]) is out of range [1,ncol=5]") # issue 2351 set.seed(1) diff --git a/src/freadR.c b/src/freadR.c index 833d9f684..b8de1d895 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -281,7 +281,10 @@ _Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol) if (k<1 || k>ncol) { DTWARN("Column number %d (drop[%d]) is out of range [1,ncol=%d]",k,j+1,ncol); } else { - if (type[k-1] == CT_DROP) STOP("Duplicates detected in drop"); + // if (type[k-1] == CT_DROP) DTWARN("drop= contains duplicates"); + // NULL in colClasses didn't work between 1.11.0 and 1.11.8 so people have been using drop= to re-specify the NULL columns in colClasses. Now that NULL in colClasses works + // from v1.12.0 there is no easy way to distinguish dups in drop= from drop overlapping with NULLs in colClasses. But it's unambiguous that it was intended to remove these + // columns, so no need for warning. type[k-1] = CT_DROP; } }