diff --git a/ChangeLog.md b/ChangeLog.md index 2428d9e..170896a 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,11 @@ +## 0.0.10 +- You want collections? You got 'em +- Added progress reporting - you can specify the rate at which + the rate is reported via the -progressRate option +- Refactored RateLimiting - added it to a new RateLimitingSession +- Laid groundwork for Dynamic rate limiting - to be worked out + once we find a way to collect the right statistic + ## 0.0.9 - Added -successDir and -failureDir - Added return codes for the loader and unloader diff --git a/README.md b/README.md index 1fd7d51..a8ef6cb 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,11 @@ loading of various types of delimited files, including ### Downloading This utility has already been built, and is available at -https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.9/cassandra-loader +https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.10/cassandra-loader Get it with wget: ``` -wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.9/cassandra-loader +wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.10/cassandra-loader ``` ### Building @@ -71,11 +71,12 @@ cassandra-loader -f myFileToLoad.csv -host 1.2.3.4 -schema "test.ltest(a, b, c, `-port` | Port Number | 9042 | Cassandra native protocol port number `-user` | Username | none | Cassandra username `-pw` | Password | none | Cassandra password + '-consistencyLevel | Consistency Level | ONE | CQL Consistency Level + `-numThreads` | Number of threads | Number of CPUs | Number of threads to use (one per file) `-numFutures` | Number of Futures | 1000 | Number of Java driver futures in flight. `-numRetries` | Number of retries | 1 | Number of times to retry the INSERT before declaring defeat. `-queryTimeout` | Timeout in seconds | 2 | Amount of time to wait for a query to finish before timing out. `-delim` | Delimiter | , | Delimiter to use - `-delimInQuotes` | True/False | false | Are delimiters allowed inside quoted strings? This is more expensive to parse, so we default to false. `-nullString` | Null String | | String to represent NULL data `-boolStyle` | Boolean Style | TRUE_FALSE | String for boolean values. Options are "1_0", "Y_N", "T_F", "YES_NO", "TRUE_FALSE". `-decimalDelim` | Decimal delimiter | . | Delimiter for decimal values. Options are "." or "," @@ -86,6 +87,7 @@ cassandra-loader -f myFileToLoad.csv -host 1.2.3.4 -schema "test.ltest(a, b, c, `-maxInsertErrors`| Max insert errors | 10 | Maximum number of rows that do not insert to allow before exiting. `-badDir` | Bad directory | current directory | Directory to write badly parsed and badly inserted rows - as well as the log file. `-rate` | Ingest rate | unlimited | Maximum rate to insert data - in rows/sec. + `-progressRate` | Progress rate | 100000 | How often to report the ingest rate (number of rows) `-successDir` | Success directory | | Location to move successfully loaded files `-failureDir` | Failure directory | | Location to move files that failed to load @@ -96,10 +98,16 @@ That way, you could pipe data in from other commands: grep IMPORTANT data.csv | cassandra-loader -f stdin -h 1.2.3.4 -cql "test.itest(a, b)" ``` -If you specify either the username or the password, then you must specify both. +Collections are supported. +Sets are started with '{' and ended with '}' and enclose a comma-separated list +{1,2,3} or {"a","b","c"} +Lists are started with '[' and ended with ']' and enclose a comma-separated list +[1,2,3] or ["a","b","c"] +Maps are started with '{' and ended with '}' and enclose a comma-separated list +of pairs that are separated by ':' +{1:1,2:2,3:3} or {"a":1, "b":2, "c":3} -If you do not have delimiters inside quoted text fields, then leave the --delimInQuotes option false. Enabling it will result in slower parsing times. +If you specify either the username or the password, then you must specify both. numFutures is a way to control the level of parallelism, but at some point too many will actually slow down the load. The default of 500 is a decent @@ -128,7 +136,6 @@ different styles, the True and False strings are as follows: Usage: -f -host -schema [OPTIONS] OPTIONS: -delim Delimiter to use [,] - -delimInQuotes true Set to 'true' if delimiter can be inside quoted fields [false] -dateFormat Date format [default for Locale.ENGLISH] -nullString String that signifies NULL [none] -skipRows Number of rows to skip [0] @@ -138,7 +145,7 @@ OPTIONS: -port CQL Port Number [9042] -user Cassandra username [none] -pw Password for user [none] - -numFutures Number of CQL futures to keep in flight [1000] + -consistencyLevel Consistency level [LOCAL_ONE] -numFutures Number of CQL futures to keep in flight [1000] -decimalDelim Decimal delimiter [.] Other option is ',' -boolStyle Style for booleans [TRUE_FALSE] -numThreads Number of concurrent threads (files) to load [num cores] @@ -146,6 +153,7 @@ OPTIONS: -numRetries Number of times to retry the INSERT [1] -maxInsertErrors <# errors> Maximum INSERT errors to endure [10] -rate Maximum insert rate [50000] + -progressRate How often to report the insert rate -successDir Directory where to move successfully loaded files -failureDir Directory where to move files that did not successfully load diff --git a/build.gradle b/build.gradle index 8dd39b6..e52ab35 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ apply plugin: 'java' apply plugin: 'application' -def versionNum = '0.0.9' +def versionNum = '0.0.10' task loader(type: Exec) { dependsOn << 'uberloader' diff --git a/src/main/java/com/datastax/loader/CqlDelimLoad.java b/src/main/java/com/datastax/loader/CqlDelimLoad.java index fb0b813..b67917a 100644 --- a/src/main/java/com/datastax/loader/CqlDelimLoad.java +++ b/src/main/java/com/datastax/loader/CqlDelimLoad.java @@ -66,7 +66,7 @@ import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy; public class CqlDelimLoad { - private String version = "0.0.9"; + private String version = "0.0.10"; private String host = null; private int port = 9042; private String username = null; diff --git a/src/main/java/com/datastax/loader/CqlDelimUnload.java b/src/main/java/com/datastax/loader/CqlDelimUnload.java index a6b2177..92d94b7 100644 --- a/src/main/java/com/datastax/loader/CqlDelimUnload.java +++ b/src/main/java/com/datastax/loader/CqlDelimUnload.java @@ -57,7 +57,7 @@ public class CqlDelimUnload { - private String version = "0.0.9"; + private String version = "0.0.10"; private String host = null; private int port = 9042; private String username = null;