Skip to content

Commit

Permalink
Merge pull request #99 from cdmikechen/data-type-refactor
Browse files Browse the repository at this point in the history
Fixed an issue with the DATA_TYPE field returning the wrong type in DatabendDatabaseMetaData
  • Loading branch information
hantmac authored Sep 14, 2023
2 parents da3be4a + c01b0a6 commit 400d281
Show file tree
Hide file tree
Showing 7 changed files with 605 additions and 429 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package com.databend.client.data;


import java.sql.Types;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

import static com.databend.client.data.DatabendRawType.startsWithIgnoreCase;
import static com.google.common.base.MoreObjects.toStringHelper;

/**
* data type that reflect java.sql.type
*/
public enum DatabendDataType {

BOOLEAN(Types.BOOLEAN, DatabendTypes.BOOLEAN, false, 1, false, "Boolean", "BOOL"),

// int8 -> TINYINT -> -128~127
INT_8(Types.TINYINT, DatabendTypes.INT8, true, 3, false, "Int8", "TINYINT"),
UNSIGNED_INT_8(Types.TINYINT, DatabendTypes.UINT8, false, 3, false, "UInt8", "TINYINT UNSIGNED"),

// int16 -> SMALLINT -> -32768~32767
INT_16(Types.SMALLINT, DatabendTypes.INT16, true, 5, false, "Int16", "SMALLINT"),
UNSIGNED_INT_16(Types.SMALLINT, DatabendTypes.UINT16, false, 5, false, "UInt16", "SMALLINT UNSIGNED"),

// int32 -> INT -> -2147483648~2147483647
INT_32(Types.INTEGER, DatabendTypes.INT32, true, 10, false, "Int32", "INTEGER"),
UNSIGNED_INT_32(Types.INTEGER, DatabendTypes.UINT32, false, 10, false, "UInt32", "INTEGER UNSIGNED"),

// INT64 -> BIGINT -> -9223372036854775808~9223372036854775807
INT_64(Types.BIGINT, DatabendTypes.INT64, true, 19, false, "Int64", "BIGINT"),
UNSIGNED_INT_64(Types.BIGINT, DatabendTypes.UINT64, false, 20, false, "UInt64", "BIGINT UNSIGNED"),

DOUBLE(Types.DOUBLE, DatabendTypes.FLOAT64, true, 22, false, "Float64", "DOUBLE"),
FLOAT(Types.FLOAT, DatabendTypes.FLOAT32, true, 12, false, "Float32", "FLOAT"),
DECIMAL(Types.DECIMAL, DatabendTypes.DECIMAL, true, 65, false, "Decimal"),

STRING(Types.VARCHAR, DatabendTypes.STRING, false, Integer.MAX_VALUE, false, "String", "VARCHAR"),

DATE(Types.DATE, DatabendTypes.DATE, false, 10, true, "Date"),
TIMESTAMP(Types.TIMESTAMP, DatabendTypes.TIMESTAMP, false, 26, true, "DateTime", "TIMESTAMP"),

ARRAY(Types.ARRAY, DatabendTypes.ARRAY, false, 0, false, "Array"),
MAP(Types.OTHER, DatabendTypes.MAP, false, 0, false, "Map"),
BITMAP(Types.OTHER, DatabendTypes.MAP, false, 0, false, "Bitmap"),
TUPLE(Types.OTHER, DatabendTypes.TUPLE, false, 0, false, "Tuple"),
VARIANT(Types.OTHER, DatabendTypes.VARIANT, false, 0, false, "Variant", "Json"),

NULL(Types.NULL, DatabendTypes.NULL, false, 0, false, "NULL"),
;

private static final Map<String, DatabendDataType> typeNameOrAliasToType;

static {
typeNameOrAliasToType = new HashMap<>();
for (DatabendDataType dataType : values()) {
Arrays.stream(dataType.aliases).forEach(alias -> typeNameOrAliasToType.put(alias.toUpperCase(), dataType));
}
}

private final int sqlType;
private final String displayName;
private final boolean signed;
private final int length;
private final boolean time;
private final String[] aliases;

/**
* Get Databend data type by full type name
*
* @param typeName full Databend data type name
* @return {@link DatabendDataType}
*/
public static DatabendDataType getByTypeName(String typeName) {
// the order of checks is important because some short names could match parts of longer names
if (DatabendTypes.BOOLEAN.equalsIgnoreCase(typeName)) {
return BOOLEAN;
} else if (DatabendTypes.INT8.equalsIgnoreCase(typeName)) {
return INT_8;
} else if (DatabendTypes.UINT8.equalsIgnoreCase(typeName)) {
return UNSIGNED_INT_8;
} else if (DatabendTypes.INT16.equalsIgnoreCase(typeName)) {
return INT_16;
} else if (DatabendTypes.UINT16.equalsIgnoreCase(typeName)) {
return UNSIGNED_INT_16;
} else if (DatabendTypes.INT32.equalsIgnoreCase(typeName)) {
return INT_32;
} else if (DatabendTypes.UINT32.equalsIgnoreCase(typeName)) {
return UNSIGNED_INT_32;
} else if (DatabendTypes.INT64.equalsIgnoreCase(typeName)) {
return INT_64;
} else if (DatabendTypes.UINT64.equalsIgnoreCase(typeName)) {
return UNSIGNED_INT_64;
} else if (DatabendTypes.FLOAT32.equalsIgnoreCase(typeName)) {
return FLOAT;
} else if (DatabendTypes.FLOAT64.equalsIgnoreCase(typeName)) {
return DOUBLE;
} else if (DatabendTypes.DATE.equalsIgnoreCase(typeName)) {
return DATE;
} else if (DatabendTypes.TIMESTAMP.equalsIgnoreCase(typeName)) {
return TIMESTAMP;
} else if (DatabendTypes.VARIANT.equalsIgnoreCase(typeName)) {
return VARIANT;
} else if (DatabendTypes.BITMAP.equalsIgnoreCase(typeName)) {
return BITMAP;
} else if (startsWithIgnoreCase(typeName, DatabendTypes.DECIMAL)) {
return DECIMAL;
} else if (startsWithIgnoreCase(typeName, DatabendTypes.STRING)) {
return STRING;
} else if (startsWithIgnoreCase(typeName, DatabendTypes.ARRAY)) {
return ARRAY;
} else if (startsWithIgnoreCase(typeName, DatabendTypes.MAP)) {
return MAP;
} else if (startsWithIgnoreCase(typeName, DatabendTypes.TUPLE)) {
return TUPLE;
}
return NULL;
}

DatabendDataType(int sqlType, String displayName, boolean signed, int length, boolean isTime, String... aliases) {
this.sqlType = sqlType;
this.displayName = displayName;
this.signed = signed;
this.length = length;
this.aliases = aliases;
this.time = isTime;
}

public int getSqlType() {
return sqlType;
}

public String getDisplayName() {
return displayName;
}

public boolean isSigned() {
return signed;
}

public int getLength() {
return length;
}

public boolean isTime() {
return time;
}

public String[] getAliases() {
return aliases;
}

public static DatabendDataType ofType(String type) {
String formattedType = type.trim().toUpperCase();
return Optional.ofNullable(typeNameOrAliasToType.get(formattedType)).orElse(NULL);
}

@Override
public String toString() {
return toStringHelper(this)
.add("sqlType", sqlType)
.add("displayName", displayName)
.add("signed", signed)
.add("length", length)
.add("time", time)
.add("aliases", Arrays.asList(aliases))
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,139 @@

import com.fasterxml.jackson.annotation.JsonCreator;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import static com.google.common.base.MoreObjects.toStringHelper;

// it could be either a string or a struct if it is not nullable
public class DatabendRawType {

private final String type;
private final boolean isNullable;
private final DatabendDataType dataType;

private Integer columnSize = null;
private Integer decimalDigits = null;
private List<DatabendRawType> subType = null;

@JsonCreator
public DatabendRawType(
String type) {
public DatabendRawType(String type) {
// pattern match on Nullable(String) ignore case, and extract the inner type
Pattern pattern = Pattern.compile("Nullable\\((.+)\\)", Pattern.CASE_INSENSITIVE);
if (pattern.matcher(type).matches()) {
// The inner type does not handle "()" again to prevent the type words missing
if (startsWithIgnoreCase(type, "Nullable")) {
this.isNullable = true;
Matcher matcher = pattern.matcher(type);
matcher.find();
this.type = matcher.group(1);
this.type = type.substring(9, type.length() - 1);
} else {
this.isNullable = false;
this.type = type;
}
// Get data type
this.dataType = DatabendDataType.getByTypeName(this.type);
if (dataType == DatabendDataType.DECIMAL) {
// Numeric with decimals
if (this.type.contains(",")) {
this.columnSize = Integer.valueOf(this.type.substring((this.type.indexOf("(") + 1), (this.type.indexOf(","))).trim());
this.decimalDigits = Integer.valueOf(this.type.substring((this.type.indexOf(",") + 1), (this.type.indexOf(")"))).trim());
} else {
this.columnSize = dataType.getLength();
this.decimalDigits = 0;
}
} else if (dataType == DatabendDataType.ARRAY) {
// remove "Array(" and last ")"
String subTypeName = this.type.substring(6, this.type.length() - 1);
this.subType = Collections.singletonList(new DatabendRawType(subTypeName));
} else if (dataType == DatabendDataType.TUPLE) {
// remove "Tuple(" and last ")"
String subTypes = this.type.substring(6, this.type.length() - 1);
// split by ","
this.subType = splitByComma(subTypes).stream()
.map(DatabendRawType::new)
.collect(Collectors.toList());
this.columnSize = subType.size();
} else if (dataType == DatabendDataType.MAP) {
// remove "Map(" and last ")"
String subTypes = this.type.substring(4, this.type.length() - 1);
// split by ","
this.subType = splitByComma(subTypes).stream()
.map(DatabendRawType::new)
.collect(Collectors.toList());
this.columnSize = subType.size();
}
if (this.columnSize == null) {
this.columnSize = this.dataType.getLength();
}
}

/**
* string start with ignore case
*/
public static boolean startsWithIgnoreCase(String str, String prefix) {
return str.regionMatches(true, 0, prefix, 0, prefix.length());
}

/**
* Split using commas, only the topmost types are processed
*/
private List<String> splitByComma(String types) {
List<String> splitTypes = new ArrayList<>();
StringBuilder splitType = new StringBuilder();
int commaTotal = 0;
for (int i = 0, size = types.length(); i < size; i++) {
char word = types.charAt(i);
if ('(' == word) {
commaTotal++;
} else if (')' == word) {
commaTotal--;
}
if (',' == word && commaTotal == 0) {
splitTypes.add(splitType.toString().trim());
splitType = new StringBuilder();
} else {
splitType.append(word);
}
}
if (splitType.length() > 0) {
splitTypes.add(splitType.toString());
}
return splitTypes;
}

public String getType() {
return type;
}


public boolean isNullable()
{
public boolean isNullable() {
return isNullable;
}

public DatabendDataType getDataType() {
return dataType;
}

public Integer getColumnSize() {
return columnSize;
}

public Integer getDecimalDigits() {
return decimalDigits;
}

public List<DatabendRawType> getSubType() {
return subType;
}

@Override
public String toString() {
return toStringHelper(this).add("type", type).add("isNullable", isNullable).toString();
return toStringHelper(this)
.add("type", type)
.add("subType", subType)
.add("isNullable", isNullable)
.add("dataType", dataType)
.add("columnSize", columnSize)
.add("decimalDigits", decimalDigits)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ public final class DatabendTypes
public static final String STRING = "string";
public static final String STRUCT = "struct";
public static final String ARRAY = "array";
public static final String TUPLE = "tuple";
public static final String MAP = "map";
public static final String BITMAP = "bitmap";
public static final String VARIANT = "variant";
public static final String VARIANT_ARRAY = "variantarray";
public static final String VARIANT_OBJECT = "variantobject";
public static final String INTERVAL = "interval";
public static final String DECIMAL = "Decimal";
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public void testGetTypeFunction() {
assertTypeHandler("Nullable(DateTime64)", StringHandler.class, true);
assertTypeHandler("Timestamp", StringHandler.class, false);
assertTypeHandler("Nullable(Timestamp)", StringHandler.class, true);
assertTypeHandler("Array", StringHandler.class, false);
assertTypeHandler("Nullable(Array)", StringHandler.class, true);
assertTypeHandler("Array(String)", StringHandler.class, false);
assertTypeHandler("Nullable(Array(Int32))", StringHandler.class, true);
assertTypeHandler("Struct", StringHandler.class, false);
assertTypeHandler("Nullable(Struct)", StringHandler.class, true);
assertTypeHandler("Null", StringHandler.class, false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.databend.client.data;

import org.testng.Assert;
import org.testng.annotations.Test;

public class TestDatabendTypes {

@Test(groups = {"Unit"})
public void testTypeNullable() {
DatabendRawType nullUnit8 = new DatabendRawType("Nullable(Uint8)");
Assert.assertEquals(nullUnit8.getType(), "Uint8");
Assert.assertEquals(nullUnit8.isNullable(), true);

DatabendRawType nullTuple = new DatabendRawType("Nullable(Tuple(String, Nullable(Int8)))");
Assert.assertEquals(nullTuple.getDataType().getDisplayName(), "tuple");
Assert.assertEquals(nullTuple.isNullable(), true);
Assert.assertTrue(nullTuple.getColumnSize() == 2);

DatabendRawType map = new DatabendRawType("MAP(STRING, STRING)");
Assert.assertEquals(map.getDataType().getDisplayName(), "map");
Assert.assertEquals(map.isNullable(), false);

DatabendRawType variant = new DatabendRawType("VARIANT");
Assert.assertEquals(variant.getDataType().getDisplayName(), "variant");
}
}
Loading

0 comments on commit 400d281

Please sign in to comment.