Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Create BigQuery Parameters for DatabaseModal #14721

Merged
merged 25 commits into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions superset/databases/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
)
from superset.databases.utils import get_table_metadata
from superset.db_engine_specs import get_available_engine_specs
from superset.db_engine_specs.base import BasicParametersMixin
from superset.exceptions import InvalidPayloadFormatError, InvalidPayloadSchemaError
from superset.extensions import security_manager
from superset.models.core import Database
Expand Down Expand Up @@ -909,11 +908,15 @@ def available(self) -> Response:
"preferred": engine_spec.engine in preferred_databases,
}

if issubclass(engine_spec, BasicParametersMixin):
payload["parameters"] = engine_spec.parameters_json_schema()
if hasattr(engine_spec, "parameters_json_schema") or hasattr(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida let me know if this is good enough of a guard statement moving forward to make sure we don't call parameters_json_schema for engines we have implemented yet

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though I think ideally we'd have all these methods in BaseEngineSpec and we'd check if parameters_schema is not None.

hughhhh marked this conversation as resolved.
Show resolved Hide resolved
engine_spec, "sqlalchemy_uri_placeholder"
):
payload[
"parameters"
] = engine_spec.parameters_json_schema() # type: ignore
payload[
"sqlalchemy_uri_placeholder"
] = engine_spec.sqlalchemy_uri_placeholder
] = engine_spec.sqlalchemy_uri_placeholder # type: ignore

available_databases.append(payload)

Expand Down
12 changes: 10 additions & 2 deletions superset/databases/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
from contextlib import closing
from typing import Any, Dict, Optional

Expand Down Expand Up @@ -81,17 +82,24 @@ def run(self) -> None:
if errors:
raise InvalidParametersError(errors)

serialized_encrypted_extra = self._properties.get("encrypted_extra", "{}")
betodealmeida marked this conversation as resolved.
Show resolved Hide resolved
try:
encrypted_extra = json.loads(serialized_encrypted_extra)
except json.decoder.JSONDecodeError:
encrypted_extra = {}

# try to connect
sqlalchemy_uri = engine_spec.build_sqlalchemy_uri(
self._properties["parameters"] # type: ignore
self._properties["parameters"], # type: ignore
encrypted_extra,
)
if self._model and sqlalchemy_uri == self._model.safe_sqlalchemy_uri():
sqlalchemy_uri = self._model.sqlalchemy_uri_decrypted
database = DatabaseDAO.build_db_for_connection_test(
server_cert=self._properties.get("server_cert", ""),
extra=self._properties.get("extra", "{}"),
impersonate_user=self._properties.get("impersonate_user", False),
encrypted_extra=self._properties.get("encrypted_extra", "{}"),
encrypted_extra=serialized_encrypted_extra,
)
database.set_sqlalchemy_uri(sqlalchemy_uri)
database.db_engine_spec.mutate_db_for_connection_test(database)
Expand Down
22 changes: 21 additions & 1 deletion superset/databases/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,12 @@ def build_sqlalchemy_uri(
the constructed SQLAlchemy URI to be passed.
"""
parameters = data.pop("parameters", None)
serialized_encrypted_extra = data.get("encrypted_extra", "{}")
try:
encrypted_extra = json.loads(serialized_encrypted_extra)
except json.decoder.JSONDecodeError:
encrypted_extra = {}

if parameters:
if "engine" not in parameters:
raise ValidationError(
Expand All @@ -264,12 +270,14 @@ def build_sqlalchemy_uri(
[_('Engine "%(engine)s" is not a valid engine.', engine=engine,)]
)
engine_spec = engine_specs[engine]

if hasattr(engine_spec, "build_sqlalchemy_uri"):
data[
"sqlalchemy_uri"
] = engine_spec.build_sqlalchemy_uri( # type: ignore
parameters
parameters, encrypted_extra
)

return data


Expand Down Expand Up @@ -546,3 +554,15 @@ def validate_password(self, data: Dict[str, Any], **kwargs: Any) -> None:
password = make_url(uri).password
if password == PASSWORD_MASK and data.get("password") is None:
raise ValidationError("Must provide a password for the database")


class EncryptedField(fields.String):
pass


def encrypted_field_properties(self, field: Any, **_) -> Dict[str, Any]: # type: ignore
ret = {}
if isinstance(field, EncryptedField):
if self.openapi_version.major > 2:
ret["x-encrypted-extra"] = True
return ret
6 changes: 5 additions & 1 deletion superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,11 @@ class BasicParametersMixin:
encryption_parameters: Dict[str, str] = {}

@classmethod
def build_sqlalchemy_uri(cls, parameters: BasicParametersType) -> str:
def build_sqlalchemy_uri(
cls,
parameters: BasicParametersType,
encryted_extra: Optional[Dict[str, str]] = None,
) -> str:
query = parameters.get("query", {})
if parameters.get("encryption"):
if not cls.encryption_parameters:
Expand Down
64 changes: 64 additions & 0 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING

import pandas as pd
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from flask_babel import gettext as __
from marshmallow import Schema
from sqlalchemy import literal_column
from sqlalchemy.sql.expression import ColumnClause
from typing_extensions import TypedDict

from superset.databases.schemas import encrypted_field_properties, EncryptedField
from superset.db_engine_specs.base import BaseEngineSpec
from superset.errors import SupersetErrorType
from superset.exceptions import SupersetGenericDBErrorException
from superset.sql_parse import Table
from superset.utils import core as utils
from superset.utils.hashing import md5_sha_from_str
Expand All @@ -38,6 +44,16 @@
+ "permission in project (?P<project>.+?)"
)

ma_plugin = MarshmallowPlugin()


class BigQueryParametersSchema(Schema):
credentials_info = EncryptedField(description="credentials.json file for BigQuery")
hughhhh marked this conversation as resolved.
Show resolved Hide resolved


class BigQueryParametersType(TypedDict):
credentials_info: Dict[str, Any]


class BigQueryEngineSpec(BaseEngineSpec):
"""Engine spec for Google's BigQuery
Expand All @@ -48,6 +64,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
engine_name = "Google BigQuery"
max_column_name_length = 128

parameters_schema = BigQueryParametersSchema()
drivername = engine
sqlalchemy_uri_placeholder = "bigquery://{project_id}"

# BigQuery doesn't maintain context when running multiple statements in the
# same cursor, so we need to run all statements at once
run_multiple_statements_as_one = True
Expand Down Expand Up @@ -282,3 +302,47 @@ def df_to_sql(
to_gbq_kwargs[key] = to_sql_kwargs[key]

pandas_gbq.to_gbq(df, **to_gbq_kwargs)

@classmethod
def build_sqlalchemy_uri(
cls, _: BigQueryParametersType, encrypted_extra: Optional[Dict[str, str]] = None
) -> str:
if encrypted_extra:
project_id = encrypted_extra.get("project_id")
return f"{cls.drivername}://{project_id}"

raise SupersetGenericDBErrorException(
message="Big Query encrypted_extra is not available",
hughhhh marked this conversation as resolved.
Show resolved Hide resolved
)

@classmethod
def get_parameters_from_uri(
cls, _: str, encrypted_extra: Optional[Dict[str, str]] = None
) -> Any:
# BigQuery doesn't have parameters
if encrypted_extra:
return encrypted_extra

raise SupersetGenericDBErrorException(
message="Big Query encrypted_extra is not available",
hughhhh marked this conversation as resolved.
Show resolved Hide resolved
)

@classmethod
def parameters_json_schema(cls) -> Any:
"""
Return configuration parameters as OpenAPI.
"""
if not cls.parameters_schema:
return None

spec = APISpec(
title="Database Parameters",
version="1.0.0",
openapi_version="3.0.0",
plugins=[ma_plugin],
)

ma_plugin.init_spec(spec)
ma_plugin.converter.add_attribute_function(encrypted_field_properties)
spec.components.schema(cls.__name__, schema=cls.parameters_schema)
return spec.to_dict()["components"]["schemas"][cls.__name__]
1 change: 1 addition & 0 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def backend(self) -> str:
def parameters(self) -> Dict[str, Any]:
# Build parameters if db_engine_spec is a subclass of BasicParametersMixin
parameters = {"engine": self.backend}

if hasattr(self.db_engine_spec, "parameters_schema") and hasattr(
self.db_engine_spec, "get_parameters_from_uri"
):
Expand Down
18 changes: 18 additions & 0 deletions tests/databases/api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from superset.connectors.sqla.models import SqlaTable
from superset.db_engine_specs.mysql import MySQLEngineSpec
from superset.db_engine_specs.postgres import PostgresEngineSpec
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
from superset.db_engine_specs.hana import HanaEngineSpec
from superset.errors import SupersetError
from superset.models.core import Database, ConfigurationMethod
Expand Down Expand Up @@ -1373,6 +1374,7 @@ def test_available(self, app, get_available_engine_specs):
app.config = {"PREFERRED_DATABASES": ["postgresql"]}
get_available_engine_specs.return_value = [
PostgresEngineSpec,
BigQueryEngineSpec,
HanaEngineSpec,
]

Expand Down Expand Up @@ -1428,6 +1430,22 @@ def test_available(self, app, get_available_engine_specs):
"preferred": True,
"sqlalchemy_uri_placeholder": "postgresql+psycopg2://user:password@host:port/dbname[?key=value&key=value...]",
},
{
"engine": "bigquery",
"name": "Google BigQuery",
"parameters": {
"properties": {
"credentials_info": {
"description": "credentials.json file for BigQuery",
"type": "string",
"x-encrypted-extra": True,
}
},
"type": "object",
},
"preferred": False,
"sqlalchemy_uri_placeholder": "bigquery://{project_id}",
},
{"engine": "hana", "name": "SAP HANA", "preferred": False},
]
}
Expand Down
5 changes: 4 additions & 1 deletion tests/db_engine_specs/postgres_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,10 @@ def test_base_parameters_mixin():
"query": {"foo": "bar"},
"encryption": True,
}
sqlalchemy_uri = PostgresEngineSpec.build_sqlalchemy_uri(parameters)
encrypted_extra = None
sqlalchemy_uri = PostgresEngineSpec.build_sqlalchemy_uri(
parameters, encrypted_extra
)
assert sqlalchemy_uri == (
"postgresql+psycopg2://username:password@localhost:5432/dbname?"
"foo=bar&sslmode=verify-ca"
Expand Down