Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: validate_parameters for GSheets #15578

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 27 additions & 33 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ dnspython==2.0.0
# via email-validator
email-validator==1.1.1
# via flask-appbuilder
flask==1.1.2
# via
# apache-superset
# flask-appbuilder
# flask-babel
# flask-caching
# flask-compress
# flask-jwt-extended
# flask-login
# flask-migrate
# flask-openid
# flask-sqlalchemy
# flask-wtf
flask-appbuilder==3.3.0
# via apache-superset
flask-babel==1.0.0
Expand All @@ -94,19 +107,6 @@ flask-wtf==0.14.3
# via
# apache-superset
# flask-appbuilder
flask==1.1.2
# via
# apache-superset
# flask-appbuilder
# flask-babel
# flask-caching
# flask-compress
# flask-jwt-extended
# flask-login
# flask-migrate
# flask-openid
# flask-sqlalchemy
# flask-wtf
geographiclib==1.50
# via geopy
geopy==2.0.0
Expand All @@ -123,11 +123,6 @@ idna==2.10
# via
# email-validator
# yarl
importlib-metadata==2.1.1
# via
# jsonschema
# kombu
# markdown
isodate==0.6.0
# via apache-superset
itsdangerous==1.1.0
Expand All @@ -154,15 +149,15 @@ markupsafe==1.1.1
# jinja2
# mako
# wtforms
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
marshmallow==3.9.0
# via
# flask-appbuilder
# marshmallow-enum
# marshmallow-sqlalchemy
marshmallow-enum==1.5.1
# via flask-appbuilder
marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
msgpack==1.0.0
# via apache-superset
multidict==5.0.0
Expand All @@ -176,7 +171,9 @@ numpy==1.19.4
# pandas
# pyarrow
packaging==20.4
# via bleach
# via
# bleach
# deprecation
pandas==1.2.2
# via apache-superset
parsedatetime==2.6
Expand Down Expand Up @@ -264,10 +261,6 @@ six==1.15.0
# wtforms-json
slackclient==2.5.0
# via apache-superset
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlalchemy==1.3.20
# via
# alembic
Expand All @@ -276,13 +269,16 @@ sqlalchemy==1.3.20
# flask-sqlalchemy
# marshmallow-sqlalchemy
# sqlalchemy-utils
sqlalchemy-utils==0.36.8
# via
# apache-superset
# flask-appbuilder
sqlparse==0.3.0
# via apache-superset
typing-extensions==3.7.4.3
# via
# aiohttp
# apache-superset
# yarl
urllib3==1.25.11
# via selenium
vine==1.3.0
Expand All @@ -295,18 +291,16 @@ werkzeug==1.0.1
# via
# flask
# flask-jwt-extended
wtforms-json==0.3.3
# via apache-superset
wtforms==2.3.3
# via
# flask-wtf
# wtforms-json
wtforms-json==0.3.3
# via apache-superset
yarl==1.6.2
# via aiohttp
zipp==3.4.1
# via
# -r requirements/base.in
# importlib-metadata
# via -r requirements/base.in

# The following packages are considered to be unsafe in a requirements file:
# setuptools
4 changes: 2 additions & 2 deletions requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ tableschema==1.20.0
# via -r requirements/development.in
tabulator==1.52.5
# via tableschema
thrift-sasl==0.4.2
# via pyhive
thrift==0.13.0
# via
# -r requirements/development.in
# pyhive
# thrift-sasl
thrift-sasl==0.4.2
# via pyhive
unicodecsv==0.14.1
# via
# tableschema
Expand Down
8 changes: 0 additions & 8 deletions requirements/integration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ filelock==3.0.12
# virtualenv
identify==1.5.9
# via pre-commit
importlib-metadata==2.1.1
# via
# pluggy
# pre-commit
# tox
# virtualenv
nodeenv==1.5.0
# via pre-commit
packaging==20.4
Expand Down Expand Up @@ -63,8 +57,6 @@ virtualenv==20.1.0
# via
# pre-commit
# tox
zipp==3.4.1
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# pip
1 change: 1 addition & 0 deletions requirements/testing.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ pylint
pytest
pytest-cov
statsd
pytest-mock
17 changes: 10 additions & 7 deletions requirements/testing.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SHA1:dba8c39bbdcb85b86e83af855d023b55a2674e87
# SHA1:d39180c0eb498d1a7dd73b8428e6ab304b728484
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand All @@ -9,6 +9,8 @@
-r integration.txt
-e file:.
# via -r requirements/base.in
appnope==0.1.2
# via ipython
astroid==2.4.2
# via pylint
backcall==0.2.0
Expand All @@ -25,12 +27,12 @@ iniconfig==1.1.1
# via pytest
ipdb==0.13.4
# via -r requirements/testing.in
ipython-genutils==0.2.0
# via traitlets
ipython==7.16.1
# via
# -r requirements/testing.in
# ipdb
ipython-genutils==0.2.0
# via traitlets
isort==5.6.4
# via pylint
jedi==0.17.2
Expand Down Expand Up @@ -63,18 +65,19 @@ pyhive[hive,presto]==0.6.3
# -r requirements/testing.in
pylint==2.6.0
# via -r requirements/testing.in
pytest-cov==2.10.1
# via -r requirements/testing.in
pytest==6.1.2
# via
# -r requirements/testing.in
# pytest-cov
# pytest-mock
pytest-cov==2.10.1
# via -r requirements/testing.in
pytest-mock==3.6.1
# via -r requirements/testing.in
statsd==3.3.0
# via -r requirements/testing.in
traitlets==5.0.5
# via ipython
typed-ast==1.4.3
# via astroid
wcwidth==0.2.5
# via prompt-toolkit
websocket-client==0.57.0
Expand Down
55 changes: 51 additions & 4 deletions superset/db_engine_specs/gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
import json
import re
from contextlib import closing
from typing import Any, Dict, Optional, Pattern, Tuple, TYPE_CHECKING
from typing import Any, Dict, List, Optional, Pattern, Tuple, TYPE_CHECKING

from flask import g
from flask_babel import gettext as __
from sqlalchemy.engine import create_engine
from sqlalchemy.engine.url import URL
from typing_extensions import TypedDict

from superset import security_manager
from superset.db_engine_specs.sqlite import SqliteEngineSpec
from superset.errors import SupersetErrorType
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType

if TYPE_CHECKING:
from superset.models.core import Database
Expand All @@ -33,6 +36,12 @@
SYNTAX_ERROR_REGEX = re.compile('SQLError: near "(?P<server_error>.*?)": syntax error')


class GSheetsParametersType(TypedDict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

be careful, TypedDict added in 3.8 so ... maybe the name should be changed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, this is a backport so we can use it in 3.7: https://github.com/python/typing/blob/master/typing_extensions/README.rst

credentials_info: Dict[str, Any]
query: Dict[str, Any]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gshees doesn't need query

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it does! The user can definitely pass options to it via the query string.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the query string for each individual url in the catalog?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's to pass parameters to configure the engine, eg:

gsheets://?list_all_sheets=1

This makes the dropdown show not only sheets in the catalog, but all of the user's sheets.

This can be done in the "extra" session as well, to be clear.

table_catalog: Dict[str, str]


class GSheetsEngineSpec(SqliteEngineSpec):
"""Engine for Google spreadsheets"""

Expand All @@ -45,7 +54,7 @@ class GSheetsEngineSpec(SqliteEngineSpec):
SYNTAX_ERROR_REGEX: (
__(
'Please check your query for syntax errors near "%(server_error)s". '
"Then, try running your query again."
"Then, try running your query again.",
),
SupersetErrorType.SYNTAX_ERROR,
{},
Expand All @@ -54,7 +63,7 @@ class GSheetsEngineSpec(SqliteEngineSpec):

@classmethod
def modify_url_for_impersonation(
cls, url: URL, impersonate_user: bool, username: Optional[str]
cls, url: URL, impersonate_user: bool, username: Optional[str],
) -> None:
if impersonate_user and username is not None:
user = security_manager.find_user(username=username)
Expand All @@ -77,3 +86,41 @@ def extra_table_metadata(
metadata = {}

return {"metadata": metadata["extra"]}

@classmethod
def validate_parameters(
cls, parameters: GSheetsParametersType,
) -> List[SupersetError]:
errors: List[SupersetError] = []

credentials_info = parameters.get("credentials_info")
table_catalog = parameters.get("table_catalog", {})

if not table_catalog:
return errors

# We need a subject in case domain wide delegation is set, otherwise the
# check will fail. This means that the admin will be able to add sheets
# that only they have access, even if later users are not able to access
# them.
subject = g.user.email if g.user else None

engine = create_engine(
"gsheets://", service_account_info=credentials_info, subject=subject,
)
conn = engine.connect()
for name, url in table_catalog.items():
try:
results = conn.execute(f'SELECT * FROM "{url}" LIMIT 1')
results.fetchall()
except Exception: # pylint: disable=broad-except
errors.append(
SupersetError(
message=f"Unable to connect to spreadsheet {name} at {url}",
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.WARNING,
extra={"name": name, "url": url},
),
)

return errors
30 changes: 30 additions & 0 deletions tests/unit_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida we could look at using pytest-flask.



@pytest.fixture
def app_context():
"""
A fixture for running the test inside an app context.
"""
from superset.app import create_app

app = create_app()
with app.app_context():
yield
Loading