Skip to content

Commit

Permalink
shortint, tinyint, date
Browse files Browse the repository at this point in the history
  • Loading branch information
brianmhess committed Dec 23, 2016
1 parent f7f3607 commit 6e2d5b2
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 32 deletions.
5 changes: 5 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 0.0.24
- Added support for DATE (and associated -localDateFormat option)
- Added support for SHORTINT and TINYINT
- Fixed an issue with special characters in column names (Issue 59)

## 0.0.23
- Fixed case of commas in Map keys
- Fixed keystore/truststore-pw typo (was pwd)
Expand Down
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ loading of various types of delimited files, including
### Downloading

This utility has already been built, and is available at
https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.23/cassandra-loader
https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.24/cassandra-loader

Get it with wget:

```
wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.23/cassandra-loader
wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.24/cassandra-loader
```

### Building
Expand Down Expand Up @@ -189,7 +189,7 @@ When using `jsonline`, all JSON field names are case-sensitive. When using `jso
## Usage Statement:

```
version: 0.0.23
version: 0.0.24
Usage: -f <filename> -host <ipaddress> [OPTIONS]
OPTIONS:
-schema <schema> Table schema (when using delim)
Expand All @@ -198,7 +198,8 @@ OPTIONS:
-configFile <filename> File with configuration options
-delim <delimiter> Delimiter to use [,]
-charsPerColumn <chars> Max number of chars per column [4096]
-dateFormat <dateFormatString> Date format [default for Locale.ENGLISH]
-dateFormat <dateFormatString> Date format for TIMESTAMP [default for Locale.ENGLISH]
-localDateFormat <formatString> Date format for DATE [yyyy-MM-dd]
-nullString <nullString> String that signifies NULL [none]
-comment <commentString> Comment symbol to use [none]
-skipRows <skipRows> Number of rows to skip [0]
Expand Down Expand Up @@ -309,7 +310,7 @@ cassandra-unloader -f stdout -host host1 -schema "ks.table(a,b,c)" | cassandra-l

Get it with wget:
```
wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.23/cassandra-unloader
wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.24/cassandra-unloader
```

To build, run:
Expand All @@ -328,13 +329,14 @@ cassandra-unloader
###Usage statement:

```
version: 0.0.23
version: 0.0.24
Usage: -f <outputStem> -host <ipaddress> -schema <schema> [OPTIONS]
OPTIONS:
-configFile <filename> File with configuration options
-format [delim|jsonline|jsonarray] Format of data: delimited or JSON [delim]
-delim <delimiter> Delimiter to use [,]
-dateFormat <dateFormatString> Date format [default for Locale.ENGLISH]
-dateFormat <dateFormatString> Date format for TIMESTAMP [default for Locale.ENGLISH]
-localDateFormat <FormatString> Date format for DATE [yyyy-MM-dd]
-nullString <nullString> String that signifies NULL [none]
-port <portNumber> CQL Port Number [9042]
-user <username> Cassandra username [none]
Expand Down
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apply plugin: 'java'
apply plugin: 'application'

def versionNum = '0.0.23'
def versionNum = '0.0.24'

allprojects {
tasks.withType(JavaCompile) {
Expand Down Expand Up @@ -29,7 +29,7 @@ repositories {
}

dependencies {
compile 'com.datastax.cassandra:cassandra-driver-core:3.0.0'
compile 'com.datastax.cassandra:cassandra-driver-core:3.1.0'
compile 'org.xerial.snappy:snappy-java:1.0.5'
compile 'net.jpountz.lz4:lz4:1.2.0'
compile 'ch.qos.logback:logback-classic:1.1.3'
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/com/datastax/loader/CqlDelimLoad.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
import org.apache.commons.lang3.StringEscapeUtils;

public class CqlDelimLoad {
private String version = "0.0.23";
private String version = "0.0.24";
private String host = null;
private int port = 9042;
private String username = null;
Expand Down Expand Up @@ -126,6 +126,7 @@ public class CqlDelimLoad {
private Locale locale = null;
private BooleanParser.BoolStyle boolStyle = null;
private String dateFormatString = null;
private String localDateFormatString = "yyyy-MM-dd";
private String nullString = null;
private String commentString = null;
private String delimiter = null;
Expand All @@ -145,7 +146,8 @@ private String usage() {
usage.append(" -configFile <filename> File with configuration options\n");
usage.append(" -delim <delimiter> Delimiter to use [,]\n");
usage.append(" -charsPerColumn <chars> Max number of chars per column [4096]\n");
usage.append(" -dateFormat <dateFormatString> Date format [default for Locale.ENGLISH]\n");
usage.append(" -dateFormat <dateFormatString> Date format for TIMESTAMP [default for Locale.ENGLISH]\n");
usage.append(" -localDateFormat <formatString> Date format for DATE [yyyy-MM-dd]\n");
usage.append(" -nullString <nullString> String that signifies NULL [none]\n");
usage.append(" -comment <commentString> Comment symbol to use [none]\n");
usage.append(" -skipRows <skipRows> Number of rows to skip [0]\n");
Expand Down Expand Up @@ -421,6 +423,7 @@ private boolean parseArgs(String[] args) throws IOException, FileNotFoundExcepti
if (null != (tkey = amap.remove("-maxRows"))) maxRows = Integer.parseInt(tkey);
if (null != (tkey = amap.remove("-badDir"))) badDir = tkey;
if (null != (tkey = amap.remove("-dateFormat"))) dateFormatString = tkey;
if (null != (tkey = amap.remove("-dateFormat"))) localDateFormatString = tkey;
if (null != (tkey = amap.remove("-nullString"))) nullString = tkey;
if (null != (tkey = amap.remove("-comment"))) commentString = tkey;
if (null != (tkey = amap.remove("-delim"))) delimiter = tkey;
Expand Down Expand Up @@ -628,6 +631,7 @@ public int compare(File f1, File f2) {
charsPerColumn, nullString,
commentString,
dateFormatString,
localDateFormatString,
boolStyle, locale,
maxErrors, skipRows,
skipCols,
Expand All @@ -652,6 +656,7 @@ public int compare(File f1, File f2) {
charsPerColumn, nullString,
commentString,
dateFormatString,
localDateFormatString,
boolStyle, locale,
maxErrors, skipRows,
skipCols,
Expand Down
11 changes: 8 additions & 3 deletions src/main/java/com/datastax/loader/CqlDelimLoadTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class CqlDelimLoadTask implements Callable<Long> {
private Locale locale = null;
private BooleanParser.BoolStyle boolStyle = null;
private String dateFormatString = null;
private String localDateFormatString = null;
private String nullString = null;
private String commentString = null;
private String delimiter = null;
Expand All @@ -101,7 +102,8 @@ class CqlDelimLoadTask implements Callable<Long> {

public CqlDelimLoadTask(String inCqlSchema, String inDelimiter,
int inCharsPerColumn,
String inNullString, String inCommentString, String inDateFormatString,
String inNullString, String inCommentString,
String inDateFormatString, String inLocalDateFormatString,
BooleanParser.BoolStyle inBoolStyle,
Locale inLocale,
long inMaxErrors, long inSkipRows,
Expand All @@ -120,6 +122,7 @@ public CqlDelimLoadTask(String inCqlSchema, String inDelimiter,
nullString = inNullString;
commentString = inCommentString;
dateFormatString = inDateFormatString;
localDateFormatString = inLocalDateFormatString;
boolStyle = inBoolStyle;
locale = inLocale;
maxErrors = inMaxErrors;
Expand Down Expand Up @@ -183,14 +186,16 @@ private void setup() throws IOException, ParseException, org.json.simple.parser.
if (format.equalsIgnoreCase("delim")) {
cdp = new CqlDelimParser(cqlSchema, delimiter, charsPerColumn,
nullString, commentString,
dateFormatString, boolStyle, locale,
dateFormatString, localDateFormatString,
boolStyle, locale,
skipCols, session, true);
}
else if (format.equalsIgnoreCase("jsonline")
|| format.equalsIgnoreCase("jsonarray")) {
cdp = new CqlDelimParser(keyspace, table, delimiter, charsPerColumn,
nullString, commentString,
dateFormatString, boolStyle, locale,
dateFormatString, localDateFormatString,
boolStyle, locale,
skipCols, session, true);
}

Expand Down
76 changes: 61 additions & 15 deletions src/main/java/com/datastax/loader/CqlDelimParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@
import com.datastax.loader.parser.BigIntegerParser;
import com.datastax.loader.parser.BooleanParser;
import com.datastax.loader.parser.ByteBufferParser;
import com.datastax.loader.parser.ByteParser;
import com.datastax.loader.parser.DateParser;
import com.datastax.loader.parser.DelimParser;
import com.datastax.loader.parser.DoubleParser;
import com.datastax.loader.parser.FloatParser;
import com.datastax.loader.parser.InetAddressParser;
import com.datastax.loader.parser.IntegerParser;
import com.datastax.loader.parser.ListParser;
import com.datastax.loader.parser.LocalDateParser;
import com.datastax.loader.parser.LongParser;
import com.datastax.loader.parser.MapParser;
import com.datastax.loader.parser.Parser;
import com.datastax.loader.parser.SetParser;
import com.datastax.loader.parser.ShortParser;
import com.datastax.loader.parser.StringParser;
import com.datastax.loader.parser.UUIDParser;

Expand All @@ -53,6 +56,9 @@
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;

import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;

public class CqlDelimParser {
private Map<DataType.Name, Parser> pmap;
private List<SchemaBits> sbl;
Expand All @@ -63,26 +69,30 @@ public class CqlDelimParser {
private JSONParser jsonParser;

public CqlDelimParser(String inCqlSchema, String inDelimiter, int inCharsPerColumn,
String inNullString, String inCommentString, String inDateFormatString,
String inNullString, String inCommentString,
String inDateFormatString, String inLocalDateFormatString,
BooleanParser.BoolStyle inBoolStyle, Locale inLocale,
String skipList, Session session, boolean bLoader)
throws ParseException {
// Optionally provide things for the line parser - date format, boolean format, locale
initPmap(inDateFormatString, inBoolStyle, inLocale, bLoader);
initPmap(inDateFormatString, inLocalDateFormatString, inBoolStyle,
inLocale, bLoader);
processCqlSchema(inCqlSchema, session);
createDelimParser(inDelimiter, inCharsPerColumn, inNullString, inCommentString, skipList);
}

public CqlDelimParser(String inKeyspace, String inTable, String inDelimiter,
int inCharsPerColumn,
String inNullString, String inCommentString, String inDateFormatString,
String inNullString, String inCommentString,
String inDateFormatString, String inLocalDateFormatString,
BooleanParser.BoolStyle inBoolStyle, Locale inLocale,
String skipList, Session session, boolean bLoader)
throws ParseException {
// Optionally provide things for the line parser - date format, boolean format, locale
keyspace = inKeyspace;
tablename = inTable;
initPmap(inDateFormatString, inBoolStyle, inLocale, bLoader);
initPmap(inDateFormatString, inLocalDateFormatString, inBoolStyle,
inLocale, bLoader);
processCqlSchema(session);
createDelimParser(inDelimiter, inCharsPerColumn, inNullString, inCommentString, skipList);
}
Expand All @@ -103,9 +113,12 @@ private class SchemaBits {
}

// intialize the Parsers and the parser map
private void initPmap(String dateFormatString, BooleanParser.BoolStyle inBoolStyle,
private void initPmap(String dateFormatString, String localDateFormatString,
BooleanParser.BoolStyle inBoolStyle,
Locale inLocale, boolean bLoader) {
pmap = new HashMap<DataType.Name, Parser>();
Parser byteParser = new ByteParser(inLocale, bLoader);
Parser shortParser = new ShortParser(inLocale, bLoader);
Parser integerParser = new IntegerParser(inLocale, bLoader);
Parser longParser = new LongParser(inLocale, bLoader);
Parser floatParser = new FloatParser(inLocale, bLoader);
Expand All @@ -118,20 +131,25 @@ private void initPmap(String dateFormatString, BooleanParser.BoolStyle inBoolSty
Parser byteBufferParser = new ByteBufferParser();
Parser inetAddressParser = new InetAddressParser();
Parser dateParser = new DateParser(dateFormatString);
Parser localDateParser = new LocalDateParser(localDateFormatString);

pmap.put(DataType.Name.ASCII, stringParser);
pmap.put(DataType.Name.BIGINT, longParser);
pmap.put(DataType.Name.BLOB, byteBufferParser);
pmap.put(DataType.Name.BOOLEAN, booleanParser);
pmap.put(DataType.Name.COUNTER, longParser);
pmap.put(DataType.Name.DATE , localDateParser);
pmap.put(DataType.Name.DECIMAL, bigDecimalParser);
pmap.put(DataType.Name.DOUBLE, doubleParser);
pmap.put(DataType.Name.FLOAT, floatParser);
pmap.put(DataType.Name.INET, inetAddressParser);
pmap.put(DataType.Name.INT, integerParser);
pmap.put(DataType.Name.SMALLINT , shortParser);
pmap.put(DataType.Name.TEXT, stringParser);
pmap.put(DataType.Name.TIME , longParser);
pmap.put(DataType.Name.TIMESTAMP, dateParser);
pmap.put(DataType.Name.TIMEUUID, uuidParser);
pmap.put(DataType.Name.TINYINT , byteParser);
pmap.put(DataType.Name.UUID, uuidParser);
pmap.put(DataType.Name.VARCHAR, stringParser);
pmap.put(DataType.Name.VARINT, bigIntegerParser);
Expand All @@ -140,15 +158,34 @@ private void initPmap(String dateFormatString, BooleanParser.BoolStyle inBoolSty

// Validate the CQL schema, extract the keyspace and tablename, and process the rest of the schema
private void processCqlSchema(String cqlSchema, Session session) throws ParseException {
String kstnRegex = "^\\s*(\\\"?[A-Za-z0-9_]+\\\"?)\\.(\\\"?[A-Za-z0-9_]+\\\"?)\\s*[\\(]\\s*(\\\"?[A-Za-z0-9_]+\\\"?\\s*(,\\s*\\\"?[A-Za-z0-9_]+\\\"?\\s*)*)[\\)]\\s*$";
Pattern p = Pattern.compile(kstnRegex);
Matcher m = p.matcher(cqlSchema);
if (!m.find()) {
throw new ParseException("Badly formatted schema " + cqlSchema, 0);
}
keyspace = m.group(1);
tablename = m.group(2);
String schemaString = m.group(3);
CsvParserSettings ks_settings = new CsvParserSettings();
ks_settings.getFormat().setLineSeparator("\n");
ks_settings.getFormat().setDelimiter('.');
ks_settings.getFormat().setQuote('\"');
ks_settings.getFormat().setQuoteEscape('\\');
ks_settings.getFormat().setCharToEscapeQuoteEscaping('\\');
ks_settings.setKeepQuotes(true);
ks_settings.setKeepEscapeSequences(true);
CsvParser ks_parser = new CsvParser(ks_settings);
String[] ks_elements = ks_parser.parseLine(cqlSchema);
keyspace = ks_elements[0];
String table_string = cqlSchema.substring(keyspace.length() + 1);

CsvParserSettings table_settings = new CsvParserSettings();
table_settings.getFormat().setLineSeparator("\n");
table_settings.getFormat().setDelimiter('(');
table_settings.getFormat().setQuote('\"');
table_settings.getFormat().setQuoteEscape('\\');
table_settings.getFormat().setCharToEscapeQuoteEscaping('\\');
table_settings.setKeepQuotes(true);
table_settings.setKeepEscapeSequences(true);
CsvParser table_parser = new CsvParser(table_settings);
String[] table_elements = table_parser.parseLine(table_string);
tablename = table_elements[0];

String schemaString = table_string.substring(tablename.length() + 1,
table_string.length() - 1);

sbl = schemaBits(schemaString, session);
}

Expand All @@ -170,7 +207,16 @@ private List<SchemaBits> schemaBits(String in, Session session) throws ParseExce
}
List<String> inList = new ArrayList<String>();
if (null != in) {
String[] tlist = in.split(",");
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setLineSeparator("\n");
settings.getFormat().setDelimiter(',');
settings.getFormat().setQuote('\"');
settings.getFormat().setQuoteEscape('\\');
settings.getFormat().setCharToEscapeQuoteEscaping('\\');
settings.setKeepQuotes(true);
settings.setKeepEscapeSequences(true);
CsvParser parser = new CsvParser(settings);
String[] tlist = parser.parseLine(in);
for (int i = 0; i < tlist.length; i++)
inList.add(tlist[i].trim());
}
Expand Down
Loading

0 comments on commit 6e2d5b2

Please sign in to comment.