Skip to content

Commit

Permalink
feat(db_engine_specs): added support for Denodo Virtual DataPort (#29927
Browse files Browse the repository at this point in the history
)
  • Loading branch information
denodo-research-labs authored Oct 24, 2024
1 parent 3d443e0 commit 1c56857
Show file tree
Hide file tree
Showing 7 changed files with 322 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Here are some of the major database solutions that are supported:
<img src="https://superset.apache.org/img/databases/doris.png" alt="doris" border="0" width="200" />
<img src="https://superset.apache.org/img/databases/oceanbase.svg" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/sap-hana.png" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/denodo.png" alt="denodo" border="0" width="200" />
</p>

**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/configuration/databases).
Expand Down
11 changes: 11 additions & 0 deletions docs/docs/configuration/databases.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ are compatible with Superset.
| [ClickHouse](/docs/configuration/databases#clickhouse) | `pip install clickhouse-connect` | `clickhousedb://{username}:{password}@{hostname}:{port}/{database}` |
| [CockroachDB](/docs/configuration/databases#cockroachdb) | `pip install cockroachdb` | `cockroachdb://root@{hostname}:{port}/{database}?sslmode=disable` |
| [Couchbase](/docs/configuration/databases#couchbase) | `pip install couchbase-sqlalchemy` | `couchbase://{username}:{password}@{hostname}:{port}?truststorepath={ssl certificate path}` |
| [Denodo](/docs/configuration/databases#denodo) | `pip install denodo-sqlalchemy` | `denodo://{username}:{password}@{hostname}:{port}/{database}` |
| [Dremio](/docs/configuration/databases#dremio) | `pip install sqlalchemy_dremio` |`dremio+flight://{username}:{password}@{host}:32010`, often useful: `?UseEncryption=true/false`. For Legacy ODBC: `dremio+pyodbc://{username}:{password}@{host}:31010` |
| [Elasticsearch](/docs/configuration/databases#elasticsearch) | `pip install elasticsearch-dbapi` | `elasticsearch+http://{user}:{password}@{host}:9200/` |
| [Exasol](/docs/configuration/databases#exasol) | `pip install sqlalchemy-exasol` | `exa+pyodbc://{username}:{password}@{hostname}:{port}/my_schema?CONNECTIONLCALL=en_US.UTF-8&driver=EXAODBC` |
Expand Down Expand Up @@ -512,6 +513,16 @@ For a connection to a SQL endpoint you need to use the HTTP path from the endpoi
```


#### Denodo

The recommended connector library for Denodo is
[denodo-sqlalchemy](https://pypi.org/project/denodo-sqlalchemy/).

The expected connection string is formatted as follows (default port is 9996):

```
denodo://{username}:{password}@{hostname}:{port}/{database}
```


#### Dremio
Expand Down
5 changes: 5 additions & 0 deletions docs/src/resources/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,9 @@ export const Databases = [
href: 'https://www.couchbase.com/',
imgName: 'couchbase.svg',
},
{
title: 'Denodo',
href: 'https://www.denodo.com/',
imgName: 'denodo.png',
},
];
Binary file added docs/static/img/databases/denodo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ databricks = [
"sqlalchemy-databricks>=0.2.0",
]
db2 = ["ibm-db-sa>0.3.8, <=0.4.0"]
denodo = ["denodo-sqlalchemy~=1.0.6"]
dremio = ["sqlalchemy-dremio>=1.2.1, <4"]
drill = ["sqlalchemy-drill>=1.1.4, <2"]
druid = ["pydruid>=0.6.5,<0.7"]
Expand Down
158 changes: 158 additions & 0 deletions superset/db_engine_specs/denodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import re
from datetime import datetime
from typing import Any, Optional

from sqlalchemy.types import Date, DateTime

from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin
from superset.errors import SupersetErrorType


# Internal class for defining error message patterns (for translation)
class _ErrorPatterns: # pylint: disable=too-few-public-methods
CONN_INVALID_USER_PWD_REGEX = re.compile("The username or password is incorrect")
CONN_INVALID_PWD_NEEDED_REGEX = re.compile("no password supplied")
CONN_INVALID_HOSTNAME_REGEX = re.compile(
'could not translate host name "(?P<hostname>.*?)" to address: '
)
CONN_PORT_CLOSED_REGEX = re.compile(
"Is the server running on that host and accepting"
)
CONN_UNKNOWN_DATABASE_REGEX = re.compile("Database '(?P<database>.*?)' not found")
CONN_FORBIDDEN_DATABASE_REGEX = re.compile(
"Insufficient privileges to connect to the database '(?P<database>.*?)'"
)
QUERY_SYNTAX_ERROR_REGEX = re.compile("Exception parsing query near '(?P<err>.*?)'")
QUERY_COLUMN_NOT_EXIST_REGEX = re.compile(
"Field not found '(?P<column>.*?)' in view '(?P<view>.*?)'"
)
QUERY_GROUPBY_ERROR_REGEX = re.compile(
"Error computing capabilities of GROUP BY view"
)
QUERY_GROUPBY_CANT_PROJ_REGEX = re.compile(
"Invalid GROUP BY expression. '(?P<exp>.*?)' cannot be projected"
)


class DenodoEngineSpec(BaseEngineSpec, BasicParametersMixin):
engine = "denodo"
engine_name = "Denodo"

default_driver = "psycopg2"
sqlalchemy_uri_placeholder = (
"denodo://user:password@host:port/dbname[?key=value&key=value...]"
)
encryption_parameters = {"sslmode": "require"}

_time_grain_expressions = {
None: "{col}",
"PT1M": "TRUNC({col},'MI')",
"PT1H": "TRUNC({col},'HH')",
"P1D": "TRUNC({col},'DDD')",
"P1W": "TRUNC({col},'W')",
"P1M": "TRUNC({col},'MONTH')",
"P3M": "TRUNC({col},'Q')",
"P1Y": "TRUNC({col},'YEAR')",
}

custom_errors: dict[
re.Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]
] = {
_ErrorPatterns.CONN_INVALID_USER_PWD_REGEX: (
"Incorrect username or password.",
SupersetErrorType.CONNECTION_INVALID_USERNAME_ERROR,
{"invalid": ["username", "password"]},
),
_ErrorPatterns.CONN_INVALID_PWD_NEEDED_REGEX: (
"Please enter a password.",
SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
{"invalid": ["password"]},
),
_ErrorPatterns.CONN_INVALID_HOSTNAME_REGEX: (
'Hostname "%(hostname)s" cannot be resolved.',
SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
{"invalid": ["host"]},
),
_ErrorPatterns.CONN_PORT_CLOSED_REGEX: (
"Server refused the connection: check hostname and port.",
SupersetErrorType.CONNECTION_PORT_CLOSED_ERROR,
{"invalid": ["host", "port"]},
),
_ErrorPatterns.CONN_UNKNOWN_DATABASE_REGEX: (
'Unable to connect to database "%(database)s"',
SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
{"invalid": ["database"]},
),
_ErrorPatterns.CONN_FORBIDDEN_DATABASE_REGEX: (
'Unable to connect to database "%(database)s": database does not '
"exist or insufficient permissions",
SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
{"invalid": ["database"]},
),
_ErrorPatterns.QUERY_SYNTAX_ERROR_REGEX: (
"Please check your query for syntax errors at or "
'near "%(err)s". Then, try running your query again.',
SupersetErrorType.SYNTAX_ERROR,
{},
),
_ErrorPatterns.QUERY_COLUMN_NOT_EXIST_REGEX: (
'Column "%(column)s" not found in "%(view)s".',
SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
{},
),
_ErrorPatterns.QUERY_GROUPBY_ERROR_REGEX: (
"Invalid aggregation expression.",
SupersetErrorType.SYNTAX_ERROR,
{},
),
_ErrorPatterns.QUERY_GROUPBY_CANT_PROJ_REGEX: (
'"%(exp)s" is neither an aggregation function nor '
"appears in the GROUP BY clause.",
SupersetErrorType.SYNTAX_ERROR,
{},
),
}

@classmethod
def epoch_to_dttm(cls) -> str:
return "GETTIMEFROMMILLIS({col})"

@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
) -> Optional[str]:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, Date):
return f"TO_DATE('yyyy-MM-dd', '{dttm.date().isoformat()}')"
if isinstance(sqla_type, DateTime):
dttm_formatted = dttm.isoformat(sep=" ", timespec="milliseconds")
return f"TO_TIMESTAMP('yyyy-MM-dd HH:mm:ss.SSS', '{dttm_formatted}')"
return None

@classmethod
def get_datatype(cls, type_code: Any) -> Optional[str]:
# pylint: disable=import-outside-toplevel
from psycopg2.extensions import binary_types, string_types

# Obtain data type names from psycopg2
types = binary_types.copy()
types.update(string_types)
if type_code in types:
return types[type_code].name
return None
146 changes: 146 additions & 0 deletions tests/unit_tests/db_engine_specs/test_denodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from typing import Any, Optional

import pytest
from sqlalchemy import column, types
from sqlalchemy.engine.url import make_url

from superset.db_engine_specs.denodo import DenodoEngineSpec as spec
from superset.utils.core import GenericDataType
from tests.unit_tests.db_engine_specs.utils import (
assert_column_spec,
assert_convert_dttm,
)
from tests.unit_tests.fixtures.common import dttm # noqa: F401


@pytest.mark.parametrize(
"target_type,expected_result",
[
("Date", "TO_DATE('yyyy-MM-dd', '2019-01-02')"),
(
"DateTime",
"TO_TIMESTAMP('yyyy-MM-dd HH:mm:ss.SSS', '2019-01-02 03:04:05.678')",
),
(
"TimeStamp",
"TO_TIMESTAMP('yyyy-MM-dd HH:mm:ss.SSS', '2019-01-02 03:04:05.678')",
),
("UnknownType", None),
],
)
def test_convert_dttm(
target_type: str,
expected_result: Optional[str],
dttm: datetime, # noqa: F811
) -> None:
assert_convert_dttm(spec, target_type, expected_result, dttm)


def test_epoch_to_dttm(
dttm: datetime, # noqa: F811
) -> None:
assert isinstance(dttm, datetime)
assert (
spec.epoch_to_dttm().format(col="epoch_dttm") == "GETTIMEFROMMILLIS(epoch_dttm)"
)


@pytest.mark.parametrize(
"native_type,sqla_type,attrs,generic_type,is_dttm",
[
("SMALLINT", types.SmallInteger, None, GenericDataType.NUMERIC, False),
("INTEGER", types.Integer, None, GenericDataType.NUMERIC, False),
("BIGINT", types.BigInteger, None, GenericDataType.NUMERIC, False),
("DECIMAL", types.Numeric, None, GenericDataType.NUMERIC, False),
("NUMERIC", types.Numeric, None, GenericDataType.NUMERIC, False),
("REAL", types.REAL, None, GenericDataType.NUMERIC, False),
("MONEY", types.Numeric, None, GenericDataType.NUMERIC, False),
# String
("CHAR", types.String, None, GenericDataType.STRING, False),
("VARCHAR", types.String, None, GenericDataType.STRING, False),
("TEXT", types.String, None, GenericDataType.STRING, False),
# Temporal
("DATE", types.Date, None, GenericDataType.TEMPORAL, True),
("TIMESTAMP", types.TIMESTAMP, None, GenericDataType.TEMPORAL, True),
("TIME", types.Time, None, GenericDataType.TEMPORAL, True),
# Boolean
("BOOLEAN", types.Boolean, None, GenericDataType.BOOLEAN, False),
],
)
def test_get_column_spec(
native_type: str,
sqla_type: type[types.TypeEngine],
attrs: Optional[dict[str, Any]],
generic_type: GenericDataType,
is_dttm: bool,
) -> None:
assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, is_dttm)


def test_get_schema_from_engine_params() -> None:
"""
Test the ``get_schema_from_engine_params`` method.
Should return None.
"""

assert (
spec.get_schema_from_engine_params(
make_url("denodo://user:password@host/db"), {}
)
is None
)


def test_get_default_catalog() -> None:
"""
Test ``get_default_catalog``.
Should return None.
"""
from superset.models.core import Database

database = Database(
database_name="denodo",
sqlalchemy_uri="denodo://user:password@host:9996/db",
)
assert spec.get_default_catalog(database) is None


@pytest.mark.parametrize(
"time_grain,expected_result",
[
(None, "col"),
("PT1M", "TRUNC(col,'MI')"),
("PT1H", "TRUNC(col,'HH')"),
("P1D", "TRUNC(col,'DDD')"),
("P1W", "TRUNC(col,'W')"),
("P1M", "TRUNC(col,'MONTH')"),
("P3M", "TRUNC(col,'Q')"),
("P1Y", "TRUNC(col,'YEAR')"),
],
)
def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
"""
DB Eng Specs (denodo): Test time grain expressions
"""
actual = str(
spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
)
assert actual == expected_result

0 comments on commit 1c56857

Please sign in to comment.