Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add etag caching to dashboard APIs #14357

Merged
merged 1 commit into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 70 additions & 32 deletions superset/dashboards/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
from superset.extensions import event_logger
from superset.models.dashboard import Dashboard
from superset.tasks.thumbnails import cache_dashboard_thumbnail
from superset.utils.cache import etag_cache
from superset.utils.screenshots import DashboardScreenshot
from superset.utils.urls import get_url_path
from superset.views.base import generate_download_headers
Expand Down Expand Up @@ -210,6 +211,23 @@ def __init__(self) -> None:
self.include_route_methods = self.include_route_methods | {"thumbnail"}
super().__init__()

def __repr__(self) -> str:
"""Deterministic string representation of the API instance for etag_cache."""
return "Superset.dashboards.api.DashboardRestApi@v{}{}".format(
self.appbuilder.app.config["VERSION_STRING"],
self.appbuilder.app.config["VERSION_SHA"],
)

@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it seems DashboardDAO.get_by_id_or_slug will be called at least 3 times on first visit (no etag). I'm starting to wonder whether it's worth it to move everything out of the view handler. Maybe some direct function calls would be both more performant and easier to understand.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, i noticed that too. The db call should be very fast since it's indexed, so i didn't think it was too big of a deal. Maybe it's worth caching for the request lifespan though. Honestly, I wonder if sqlalchemy could do something like that for us

skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>", methods=["GET"])
@protect()
@safe
Expand Down Expand Up @@ -257,6 +275,16 @@ def get(self, id_or_slug: str) -> Response:
except DashboardNotFoundError:
return self.response_404()

@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_datasets_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>/datasets", methods=["GET"])
@protect()
@safe
Expand All @@ -267,38 +295,38 @@ def get(self, id_or_slug: str) -> Response:
)
def get_datasets(self, id_or_slug: str) -> Response:
"""Gets a dashboard's datasets
---
get:
description: >-
Returns a list of a dashboard's datasets. Each dataset includes only
the information necessary to render the dashboard's charts.
parameters:
- in: path
schema:
type: string
name: id_or_slug
description: Either the id of the dashboard, or its slug
responses:
200:
description: Dashboard dataset definitions
content:
application/json:
schema:
type: object
properties:
result:
type: array
items:
$ref: '#/components/schemas/DashboardDatasetSchema'
302:
description: Redirects to the current digest
400:
$ref: '#/components/responses/400'
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
"""
---
get:
description: >-
Returns a list of a dashboard's datasets. Each dataset includes only
the information necessary to render the dashboard's charts.
parameters:
- in: path
schema:
type: string
name: id_or_slug
description: Either the id of the dashboard, or its slug
responses:
200:
description: Dashboard dataset definitions
content:
application/json:
schema:
type: object
properties:
result:
type: array
items:
$ref: '#/components/schemas/DashboardDatasetSchema'
302:
description: Redirects to the current digest
400:
$ref: '#/components/responses/400'
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
"""
try:
datasets = DashboardDAO.get_datasets_for_dashboard(id_or_slug)
result = [
Expand All @@ -308,6 +336,16 @@ def get_datasets(self, id_or_slug: str) -> Response:
except DashboardNotFoundError:
return self.response_404()

@etag_cache(
get_last_modified=lambda _self, id_or_slug: DashboardDAO.get_dashboard_and_slices_changed_on( # pylint: disable=line-too-long
id_or_slug
),
max_age=0,
raise_for_access=lambda _self, id_or_slug: DashboardDAO.get_by_id_or_slug(
id_or_slug
),
skip=lambda _self, id_or_slug: not is_feature_enabled("DASHBOARD_CACHE"),
)
@expose("/<id_or_slug>/charts", methods=["GET"])
@protect()
@safe
Expand Down
67 changes: 66 additions & 1 deletion superset/dashboards/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
# under the License.
import json
import logging
from typing import Any, Dict, List, Optional
from datetime import datetime
from typing import Any, Dict, List, Optional, Union

from sqlalchemy.exc import SQLAlchemyError

Expand Down Expand Up @@ -54,6 +55,70 @@ def get_datasets_for_dashboard(id_or_slug: str) -> List[Any]:
def get_charts_for_dashboard(id_or_slug: str) -> List[Slice]:
return DashboardDAO.get_by_id_or_slug(id_or_slug).slices

@staticmethod
def get_dashboard_changed_on(
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard.

:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""

dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
return dashboard.changed_on

@staticmethod
def get_dashboard_and_slices_changed_on( # pylint: disable=invalid-name
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard. The change could be a dashboard
metadata change, or a change to one of its dependent slices.

:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""

dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard)
slices_changed_on = max([slc.changed_on for slc in dashboard.slices])
# drop microseconds in datetime to match with last_modified header
return max(dashboard_changed_on, slices_changed_on).replace(microsecond=0)

@staticmethod
def get_dashboard_and_datasets_changed_on( # pylint: disable=invalid-name
id_or_slug_or_dashboard: Union[str, Dashboard]
) -> datetime:
"""
Get latest changed datetime for a dashboard. The change could be a dashboard
metadata change, a change to one of its dependent datasets.

:param id_or_slug_or_dashboard: A dashboard or the ID or slug of the dashboard.
:returns: The datetime the dashboard was last changed.
"""

dashboard = (
DashboardDAO.get_by_id_or_slug(id_or_slug_or_dashboard)
if isinstance(id_or_slug_or_dashboard, str)
else id_or_slug_or_dashboard
)
dashboard_changed_on = DashboardDAO.get_dashboard_changed_on(dashboard)
datasources_changed_on = max(
[datasource.changed_on for datasource in dashboard.datasources]
)
# drop microseconds in datetime to match with last_modified header
return max(dashboard_changed_on, datasources_changed_on).replace(microsecond=0)

@staticmethod
def validate_slug_uniqueness(slug: str) -> bool:
if not slug:
Expand Down
45 changes: 41 additions & 4 deletions superset/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,11 @@ def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any:


def etag_cache(
cache: Cache = cache_manager.cache, max_age: Optional[Union[int, float]] = None,
cache: Cache = cache_manager.cache,
get_last_modified: Optional[Callable[..., datetime]] = None,
max_age: Optional[Union[int, float]] = None,
raise_for_access: Optional[Callable[..., Any]] = None,
skip: Optional[Callable[..., bool]] = None,
) -> Callable[..., Any]:
"""
A decorator for caching views and handling etag conditional requests.
Expand All @@ -139,10 +143,19 @@ def etag_cache(
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
@wraps(f)
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
# Check if the user can access the resource
if raise_for_access:
try:
raise_for_access(*args, **kwargs)
except Exception: # pylint: disable=broad-except
# If there's no access, bypass the cache and let the function
# handle the response.
return f(*args, **kwargs)

# for POST requests we can't set cache headers, use the response
# cache nor use conditional requests; this will still use the
# dataframe cache in `superset/viz.py`, though.
if request.method == "POST":
if request.method == "POST" or (skip and skip(*args, **kwargs)):
return f(*args, **kwargs)

response = None
Expand All @@ -161,13 +174,37 @@ def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
raise
logger.exception("Exception possibly due to cache backend.")

# Check if the cache is stale. Default the content_changed_time to now
# if we don't know when it was last modified.
content_changed_time = datetime.utcnow()
if get_last_modified:
content_changed_time = get_last_modified(*args, **kwargs)
if (
response
and response.last_modified
and response.last_modified.timestamp()
< content_changed_time.timestamp()
):
# Bypass the cache if the response is stale
response = None

# if no response was cached, compute it using the wrapped function
if response is None:
response = f(*args, **kwargs)

# add headers for caching: Last Modified, Expires and ETag
response.cache_control.public = True
response.last_modified = datetime.utcnow()
# always revalidate the cache if we're checking permissions or
# if the response was modified
if get_last_modified or raise_for_access:
# `Cache-Control: no-cache` asks the browser to always store
# the cache, but also must validate it with the server.
response.cache_control.no_cache = True
else:
# `Cache-Control: Public` asks the browser to always store
# the cache.
response.cache_control.public = True

response.last_modified = content_changed_time
expiration = max_age or ONE_YEAR # max_age=0 also means far future
response.expires = response.last_modified + timedelta(
seconds=expiration
Expand Down
30 changes: 30 additions & 0 deletions tests/dashboards/dao_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# isort:skip_file
import copy
import json
import time

import pytest

Expand Down Expand Up @@ -79,3 +80,32 @@ def test_set_dash_metadata(self):

# reset dash to original data
DashboardDAO.set_dash_metadata(dash, original_data)

@pytest.mark.usefixtures("load_world_bank_dashboard_with_slices")
def test_get_dashboard_changed_on(self):
session = db.session()
dashboard = session.query(Dashboard).filter_by(slug="world_health").first()

assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on(dashboard)
assert dashboard.changed_on == DashboardDAO.get_dashboard_changed_on(
"world_health"
)

old_changed_on = dashboard.changed_on

# freezegun doesn't work for some reason, so we need to sleep here :(
time.sleep(1)
data = dashboard.data
positions = data["position_json"]
data.update({"positions": positions})
original_data = copy.deepcopy(data)

data.update({"foo": "bar"})
DashboardDAO.set_dash_metadata(dashboard, data)
session.merge(dashboard)
session.commit()
assert old_changed_on < DashboardDAO.get_dashboard_changed_on(dashboard)

DashboardDAO.set_dash_metadata(dashboard, original_data)
session.merge(dashboard)
session.commit()