Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the csv module and the dependent ones, fixes #1391 #1477

Merged
merged 8 commits into from
Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pyintegration/deephaven2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
"""Deephaven Python Integration Package provides the ability to access the Deephaven's query engine natively and thus
unlocks the unique and tremendous power Deephaven to Python community.

"""

from .dherror import DHError
from .csv import read_csv
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

__version__ = "0.5.2"
13 changes: 13 additions & 0 deletions pyintegration/deephaven2/column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
from dataclasses import dataclass


@dataclass
class Column:
""" A Column object represents a column in a Deephaven Table. """
name: str
data_type: str
component_type: str
column_type: str
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
107 changes: 107 additions & 0 deletions pyintegration/deephaven2/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
""" The deephaven.csv module supports reading an external CSV file into a Deephaven table and writing a
Deephaven table out as a CSV file.
"""
from enum import Enum
from typing import Dict, Optional, Any

import jpy

from deephaven2 import DHError
from deephaven2.dtypes import DType
from deephaven2.table import Table

_csv_helpers_cls = jpy.get_type("io.deephaven.db.tables.utils.CsvHelpers")
_csv_specs_cls = jpy.get_type("io.deephaven.db.tables.utils.csv.CsvSpecs")
_table_header_cls = jpy.get_type("io.deephaven.qst.table.TableHeader")
_inference_specs_cls = jpy.get_type("io.deephaven.db.tables.utils.csv.InferenceSpecs")
_j_charset_cls = jpy.get_type("java.nio.charset.Charset")


class Inference(Enum):
""" An Enum of predefined inference specs.

Inference specifications contains the configuration and logic for inferring an acceptable parser from string values.
"""

STRINGS = _inference_specs_cls.strings()
""" The order of parsing: STRING, INSTANT, SHORT, INT, LONG, DOUBLE, BOOL, CHAR, BYTE, FLOAT """

MINIMAL = _inference_specs_cls.minimal()
""" The order of parsing: INSTANT, LONG, DOUBLE, BOOL, STRING, BYTE, SHORT, INT, FLOAT, CHAR """

STANDARD = _inference_specs_cls.standard()
""" The order of parsing: INSTANT, SHORT, INT, LONG, DOUBLE, BOOL, CHAR, STRING, BYTE, FLOAT """

STANDARD_TIMES = _inference_specs_cls.standardTimes()
""" The order of parsing: INSTANT, INSTANT_LEGACY, SECONDS, MILLISECONDS, MICROSECONDS, NANOSECONDS, SHORT, INT,
LONG, DOUBLE, BOOL, CHAR, STRING, BYTE, FLOAT
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

For values that can be parsed as SECONDS/MILLISECONDS/MICROSECONDS/NANOSECONDS, they must be within the 21 century.
"""


def _build_header(header: Dict[str, DType] = None):
if not header:
return None

table_header_builder = _table_header_cls.builder()
for k, v in header.items():
table_header_builder.putHeaders(k, v.value)

return table_header_builder.build()


def read_csv(path: str,
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
header: Dict[str, DType] = None,
inference: Any = Inference.STANDARD_TIMES,
headless: bool = False,
delimiter: str = ",",
quote: str = "\"",
ignore_surrounding_spaces: bool = True,
trim: bool = False,
charset: str = "utf-8") -> Table:
""" read the CSV data specified by the path parameter as a table.

Args:
path (str): a file path or a URL string
header (Dict[str, DType]): a dict to define the table columns with key being the name, value being the data type
inference (csv.Inference): an Enum value specifying the rules for data type inference, default is STANDARD_TIMES
headless (bool): indicates if the CSV data is headless, default is False
delimiter (str): the delimiter used by the CSV, default is the comma
quote (str): the quote character for the CSV, default is double quote
ignore_surrounding_spaces (bool): indicates whether surrounding white space should be ignored for unquoted text
field, default is True
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
trim (bool) : indicates whether to trim white space inside a quoted string, default is False
charset (str): the name of the charset used for the CSV data, default is 'utf-8'

Returns:
a Table

Raises:
DHError
"""
try:
csv_specs_builder = _csv_specs_cls.builder()

# build the head spec
table_header = _build_header(header)
if table_header:
csv_specs_builder.header(table_header)

csv_specs = (csv_specs_builder.inference(inference.value)
.hasHeaderRow(not headless)
.delimiter(ord(delimiter))
.quote(ord(quote))
.ignoreSurroundingSpaces(ignore_surrounding_spaces)
.trim(trim)
.charset(_j_charset_cls.forName(charset))
.build())

db_table = _csv_helpers_cls.readCsv(path, csv_specs)

return Table(db_table=db_table)
except Exception as e:
raise DHError(e, "read_csv failed") from e
70 changes: 70 additions & 0 deletions pyintegration/deephaven2/dherror.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
""" This module defines a custom exception for the Deephaven Python Integration Package.

The custom exception is named DHError. It encapsulates exceptions thrown by the Deephaven engine and the
Python/Java integration layer and provides 3 convenient properties: root_cause, compact_traceback, and
traceback for easy debugging.

"""
import traceback

jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

class DHError(Exception):
""" The custom exception class for the Deephaven Python package.

This exception can be raised due to user errors or system errors when Deephaven resources and functions
are accessed, for example, during reading a CSV/Parquet file into a Deephaven table or performing an
aggregation or join operation on Deephaven tables. It is a good practice for Python code to catch this
exception and handle it appropriately.
"""

def __init__(self, cause=None, message=""):
super().__init__()
self._message = message
self._traceback = traceback.format_exc()

tb_lines = self._traceback.splitlines()
self._root_cause = ""
self._compact_tb = []
for_compact_tb = True
for tb_ln in tb_lines:
if tb_ln.startswith("caused by"):
self._root_cause = tb_ln.split("by")[1].strip()
if tb_ln.strip().endswith(":"):
self._compact_tb.append(tb_ln[:-1].strip())
else:
self._compact_tb.append(tb_ln)
elif tb_ln.startswith("RuntimeError"):
self._root_cause = tb_ln
self._compact_tb.append(tb_ln)
for_compact_tb = False
elif tb_ln.startswith("Exception message"):
self._root_cause = tb_ln.split(":")[1] if ":" in tb_ln else tb_ln
self._root_cause = self._root_cause.strip()
self._compact_tb[-1] = self._compact_tb[-1] + f" {self._root_cause}"

if for_compact_tb:
self._compact_tb.append(tb_ln)

@property
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
def root_cause(self):
""" The root cause of the exception. """
return self._root_cause

@property
def traceback(self):
""" The traceback of the exception. """
return self._traceback

@property
def compact_traceback(self) -> str:
""" The compact traceback of the exception. """
return "\n".join(self._compact_tb)

def __str__(self):
if self._root_cause:
return f"{self._message} : {self._root_cause}"
else:
return self._message
126 changes: 126 additions & 0 deletions pyintegration/deephaven2/dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
""" This module defines the data types supported by the Deephaven engine.

Each data type is represented by a DType class which supports creating arrays of the same type and more.
"""
from enum import Enum
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
from typing import Iterable

import jpy
from deephaven2 import DHError

_qst_type = jpy.get_type("io.deephaven.qst.type.Type")
_table_tools = jpy.get_type("io.deephaven.db.tables.utils.TableTools")


def _qst_custom_type(cls_name: str):
return _qst_type.find(_table_tools.typeFromName(cls_name))


class DType(Enum):
""" An Enum for supported data types in Deephaven with type aliases to mirror the same ones in numpy or pyarrow.

The complex types such as BigDecimal, DBPeriod can be called to create Java objects of the same types, e.g.
big_decimal = BigDecimal(12.88)

"""
bool_ = _qst_type.booleanType(), "java.lang.Boolean"
byte = _qst_type.byteType(), "byte"
int8 = byte
short = _qst_type.shortType(), "short"
int16 = short
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
char = _qst_type.charType(), "char"
int_ = _qst_type.intType(), "int"
int32 = int_
long = _qst_type.longType(), "long"
int64 = long
float_ = _qst_type.floatType(), "float"
single = float_
float32 = float_
double = _qst_type.doubleType(), "double"
float64 = double
string = _qst_type.stringType(), "java.lang.String"
BigDecimal = _qst_custom_type("java.math.BigDecimal"), "java.math.BigDecimal"
StringSet = _qst_custom_type("io.deephaven.db.tables.libs.StringSet"), "io.deephaven.db.tables.libs.StringSet"
DBDateTime = _qst_custom_type("io.deephaven.db.tables.utils.DBDateTime"), "io.deephaven.db.tables.utils.DBDateTime"
DBPeriod = _qst_custom_type("io.deephaven.db.tables.utils.DBPeriod"), "io.deephaven.db.tables.utils.DBPeriod"

def __new__(cls, qst_type, j_type):
obj = object.__new__(cls)
obj._value_ = qst_type
return obj

def __init__(self, qst_type, j_name):
self._qst_type = qst_type
self._j_name = j_name
self._j_type = jpy.get_type(j_name)

def __call__(self, *args, **kwargs):
return self._j_type(*args, **kwargs)

@property
def qst_type(self):
return self._qst_type

@property
def j_type(self):
""" The corresponding Java type. """
return self._j_type

def array(self, size: int):
""" Create a Java array of the same data type of the specified size.

Args:
size (int): the size of the array

Returns:
a Java array

Raises:
DHError
"""
try:
return jpy.array(self._j_name, size)
except Exception as e:
raise DHError("failed to create a Java array.") from e

def array_from(self, values: Iterable):
""" Create a Java array of the same data type populated with values from the Python iterable.
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

Args:
values: a Python iterable of compatible data type

Returns:
a Java array

Raises:
DHError
"""
try:
return jpy.array(self._j_name, values)
except Exception as e:
raise DHError("failed to create a Java array.") from e


bool_ = DType.bool_
byte = DType.byte
int8 = DType.int8
short = DType.short
int16 = DType.int16
char = DType.char
int_ = DType.int_
int32 = DType.int32
long = DType.long
int64 = DType.int64
float_ = DType.float_
single = DType.single
float32 = DType.float32
double = DType.double
float64 = DType.float64
string = DType.string
BigDecimal = DType.BigDecimal
StringSet = DType.StringSet
DBDateTime = DType.DBDateTime
DBPeriod = DType.DBPeriod
60 changes: 60 additions & 0 deletions pyintegration/deephaven2/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending
#
""" This module implements the Table class and functions that work with Tables. """
from typing import List
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

from deephaven2 import DHError
from deephaven2.column import Column


class Table:
""" A Table represents a Deephaven table. It allows applications to perform powerful Deephaven table operations
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved


Note: A client should not instantiate Table directly. Tables are mostly created by factory methods, data ingress
operations, queries, aggregations, joins, etc.
"""

def __init__(self, db_table):
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
self._db_table = db_table
self._definition = self._db_table.getDefinition()
self._schema = None

# to make the table visible to DH script session
def get_dh_table(self):
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
return self._db_table

@property
def columns(self):
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
""" The column definitions of the table. """
if self._schema:
return self._schema

self._schema = []
j_col_list = self._definition.getColumnList()
for i in range(j_col_list.size()):
j_col = j_col_list.get(i)
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
self._schema.append(Column(j_col.getName(),
j_col.getDataType(),
j_col.getComponentType(),
j_col.getColumnType()))

return self._schema

def update(self, formulas: List[str]):
""" The update method creates a new table containing a new, in-memory column for each formula.

Args:
formulas (List[str]): TODO

Returns:
A new table

Raises:
DHError
"""
try:
return Table(db_table=self._db_table.update(formulas))
except Exception as e:
raise DHError(e, "table.update failed") from e
2 changes: 2 additions & 0 deletions pyintegration/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
setuptools~=58.2.0
wrapt~=1.13.1
Loading