forked from pflooky/data-caterer
-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #69 from data-catering/insta-integration
Insta integration
- Loading branch information
Showing
14 changed files
with
205 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
150 changes: 75 additions & 75 deletions
150
app/src/main/scala/io/github/datacatering/datacaterer/core/util/ProtobufUtil.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,75 +1,75 @@ | ||
package io.github.datacatering.datacaterer.core.util | ||
|
||
import com.google.protobuf.DescriptorProtos | ||
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto | ||
import com.google.protobuf.Descriptors.FieldDescriptor | ||
import com.google.protobuf.Descriptors.FieldDescriptor.JavaType | ||
import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType} | ||
|
||
import java.io.{BufferedInputStream, FileInputStream} | ||
import scala.collection.JavaConverters.asScalaBufferConverter | ||
|
||
object ProtobufUtil { | ||
|
||
def toStructType(descriptorFile: String): Map[String, StructType] = { | ||
val file = new BufferedInputStream(new FileInputStream(descriptorFile)) | ||
val fileDescriptorSet = DescriptorProtos.FileDescriptorSet.parseFrom(file) | ||
fileDescriptorSet.getFileList.asScala | ||
.flatMap(fd => { | ||
fd.getMessageTypeList.asScala.toList.map(message => { | ||
(message.getName, StructType(getSchemaFromFieldsProto(message.getFieldList.asScala.toList))) | ||
}) | ||
// (fd.getName, StructType(getSchemaFromFields(fd.getMessageTypeList.asScala.toList))) | ||
}).toMap | ||
} | ||
|
||
private def getSchemaFromFields(fields: List[FieldDescriptor]): Array[StructField] = { | ||
fields.map(field => { | ||
val dataType = getDataTypeForField(field) | ||
StructField(field.getName, dataType, !field.isRequired) | ||
}).toArray | ||
} | ||
|
||
private def getSchemaFromFieldsProto(fields: List[FieldDescriptorProto]): Array[StructField] = { | ||
fields.map(field => { | ||
val dataType = getDataTypeForField(field) | ||
StructField(field.getName, dataType) | ||
}).toArray | ||
} | ||
|
||
private def getDataTypeForField(fieldDescriptor: FieldDescriptor): DataType = { | ||
fieldDescriptor.getJavaType match { | ||
case JavaType.BOOLEAN => DataTypes.BooleanType | ||
case JavaType.INT => DataTypes.IntegerType | ||
case JavaType.LONG => DataTypes.LongType | ||
case JavaType.DOUBLE => DataTypes.DoubleType | ||
case JavaType.FLOAT => DataTypes.FloatType | ||
case JavaType.STRING => DataTypes.StringType | ||
case JavaType.ENUM => DataTypes.StringType | ||
case JavaType.BYTE_STRING => DataTypes.BinaryType | ||
case JavaType.MESSAGE => { | ||
new StructType(getSchemaFromFields(fieldDescriptor.getMessageType.getFields.asScala.toList)) | ||
} | ||
case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor.getType}") | ||
} | ||
} | ||
|
||
private def getDataTypeForField(fieldDescriptor: FieldDescriptorProto): DataType = { | ||
// val nonProtoField = FieldDescriptor.Type.valueOf(fieldDescriptor.getType) | ||
FieldDescriptor.Type.valueOf(fieldDescriptor.getType).getJavaType match { | ||
case JavaType.BOOLEAN => DataTypes.BooleanType | ||
case JavaType.INT => DataTypes.IntegerType | ||
case JavaType.LONG => DataTypes.LongType | ||
case JavaType.DOUBLE => DataTypes.DoubleType | ||
case JavaType.FLOAT => DataTypes.FloatType | ||
case JavaType.STRING => DataTypes.StringType | ||
case JavaType.ENUM => DataTypes.StringType | ||
case JavaType.BYTE_STRING => DataTypes.BinaryType | ||
case JavaType.MESSAGE => { | ||
new StructType(getSchemaFromFields(fieldDescriptor.getDescriptorForType.getFields.asScala.toList)) | ||
} | ||
case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor}") | ||
} | ||
} | ||
|
||
} | ||
//package io.github.datacatering.datacaterer.core.util | ||
// | ||
//import com.google.protobuf.DescriptorProtos | ||
//import com.google.protobuf.DescriptorProtos.FieldDescriptorProto | ||
//import com.google.protobuf.Descriptors.FieldDescriptor | ||
//import com.google.protobuf.Descriptors.FieldDescriptor.JavaType | ||
//import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType} | ||
// | ||
//import java.io.{BufferedInputStream, FileInputStream} | ||
//import scala.collection.JavaConverters.asScalaBufferConverter | ||
// | ||
//object ProtobufUtil { | ||
// | ||
// def toStructType(descriptorFile: String): Map[String, StructType] = { | ||
// val file = new BufferedInputStream(new FileInputStream(descriptorFile)) | ||
// val fileDescriptorSet = DescriptorProtos.FileDescriptorSet.parseFrom(file) | ||
// fileDescriptorSet.getFileList.asScala | ||
// .flatMap(fd => { | ||
// fd.getMessageTypeList.asScala.toList.map(message => { | ||
// (message.getName, StructType(getSchemaFromFieldsProto(message.getFieldList.asScala.toList))) | ||
// }) | ||
// // (fd.getName, StructType(getSchemaFromFields(fd.getMessageTypeList.asScala.toList))) | ||
// }).toMap | ||
// } | ||
// | ||
// private def getSchemaFromFields(fields: List[FieldDescriptor]): Array[StructField] = { | ||
// fields.map(field => { | ||
// val dataType = getDataTypeForField(field) | ||
// StructField(field.getName, dataType, !field.isRequired) | ||
// }).toArray | ||
// } | ||
// | ||
// private def getSchemaFromFieldsProto(fields: List[FieldDescriptorProto]): Array[StructField] = { | ||
// fields.map(field => { | ||
// val dataType = getDataTypeForField(field) | ||
// StructField(field.getName, dataType) | ||
// }).toArray | ||
// } | ||
// | ||
// private def getDataTypeForField(fieldDescriptor: FieldDescriptor): DataType = { | ||
// fieldDescriptor.getJavaType match { | ||
// case JavaType.BOOLEAN => DataTypes.BooleanType | ||
// case JavaType.INT => DataTypes.IntegerType | ||
// case JavaType.LONG => DataTypes.LongType | ||
// case JavaType.DOUBLE => DataTypes.DoubleType | ||
// case JavaType.FLOAT => DataTypes.FloatType | ||
// case JavaType.STRING => DataTypes.StringType | ||
// case JavaType.ENUM => DataTypes.StringType | ||
// case JavaType.BYTE_STRING => DataTypes.BinaryType | ||
// case JavaType.MESSAGE => { | ||
// new StructType(getSchemaFromFields(fieldDescriptor.getMessageType.getFields.asScala.toList)) | ||
// } | ||
// case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor.getType}") | ||
// } | ||
// } | ||
// | ||
// private def getDataTypeForField(fieldDescriptor: FieldDescriptorProto): DataType = { | ||
// // val nonProtoField = FieldDescriptor.Type.valueOf(fieldDescriptor.getType) | ||
// FieldDescriptor.Type.valueOf(fieldDescriptor.getType).getJavaType match { | ||
// case JavaType.BOOLEAN => DataTypes.BooleanType | ||
// case JavaType.INT => DataTypes.IntegerType | ||
// case JavaType.LONG => DataTypes.LongType | ||
// case JavaType.DOUBLE => DataTypes.DoubleType | ||
// case JavaType.FLOAT => DataTypes.FloatType | ||
// case JavaType.STRING => DataTypes.StringType | ||
// case JavaType.ENUM => DataTypes.StringType | ||
// case JavaType.BYTE_STRING => DataTypes.BinaryType | ||
// case JavaType.MESSAGE => { | ||
// new StructType(getSchemaFromFields(fieldDescriptor.getDescriptorForType.getFields.asScala.toList)) | ||
// } | ||
// case _ => throw new RuntimeException(s"Unable to parse proto type, type=${fieldDescriptor}") | ||
// } | ||
// } | ||
// | ||
//} |
11 changes: 11 additions & 0 deletions
11
app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
name: "account_balance_and_transactions_create_plan" | ||
description: "Create balances and transactions in Postgres" | ||
tasks: | ||
- name: "jdbc_customer_balance_and_transactions" | ||
dataSourceName: "postgres" | ||
|
||
sinkOptions: | ||
foreignKeys: | ||
- - "postgres.balances.account_number" | ||
- - "postgres.transactions.account_number" | ||
- [] |
49 changes: 49 additions & 0 deletions
49
app/src/test/resources/sample/task/postgres/postgres-balance-transaction-task.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
name: "jdbc_customer_balance_and_transactions" | ||
steps: | ||
- name: "balances" | ||
type: "postgres" | ||
count: | ||
records: 1000 | ||
options: | ||
dbtable: "account.balances" | ||
schema: | ||
fields: | ||
- name: "account_number" | ||
generator: | ||
type: "regex" | ||
options: | ||
regex: "ACC1[0-9]{5,10}" | ||
isUnique: true | ||
- name: "create_time" | ||
type: "timestamp" | ||
- name: "account_status" | ||
type: "string" | ||
generator: | ||
type: "oneOf" | ||
options: | ||
oneOf: | ||
- "open" | ||
- "closed" | ||
- "suspended" | ||
- name: "balance" | ||
type: "double" | ||
- name: "transactions" | ||
type: "postgres" | ||
count: | ||
perColumn: | ||
columnNames: | ||
- "account_number" | ||
count: 5 | ||
options: | ||
dbtable: "account.transactions" | ||
schema: | ||
fields: | ||
- name: "account_number" | ||
- name: "create_time" | ||
type: "timestamp" | ||
- name: "transaction_id" | ||
generator: | ||
options: | ||
regex: "txn-[0-9]{10}" | ||
- name: "amount" | ||
type: "double" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
groupId=io.github.data-catering | ||
version=0.11.7 | ||
version=0.11.8 | ||
|
||
scalaVersion=2.12 | ||
scalaSpecificVersion=2.12.19 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
services: | ||
- name: postgres | ||
data: app/src/test/resources/sample/sql/postgres | ||
run: | ||
- command: java -jar app/build/libs/data-caterer.jar | ||
env: | ||
PLAN_FILE_PATH: app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml | ||
TASK_FOLDER_PATH: app/src/test/resources/sample/task | ||
test: | ||
validation: | ||
postgres: | ||
- options: | ||
dbtable: account.balances | ||
validations: | ||
- expr: ISNOTNULL(account_id) | ||
- aggType: count | ||
aggExpr: count == 1000 | ||
- options: | ||
dbtable: account.transactions | ||
validations: | ||
- expr: ISNOTNULL(account_id) | ||
- aggType: count | ||
aggExpr: count == 5000 | ||
- groupByCols: [account_number] | ||
aggType: count | ||
aggExpr: count == 5 |
Oops, something went wrong.