Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

show pg and athena column comments and table descriptions as antd tooltip if they are defined #6582

Merged
merged 7 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 52 additions & 22 deletions client/app/components/queries/SchemaBrowser.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import LoadingState from "../items-list/components/LoadingState";
const SchemaItemColumnType = PropTypes.shape({
name: PropTypes.string.isRequired,
type: PropTypes.string,
comment: PropTypes.string,
});

export const SchemaItemType = PropTypes.shape({
Expand Down Expand Up @@ -47,13 +48,30 @@ function SchemaItem({ item, expanded, onToggle, onSelect, ...props }) {
return (
<div {...props}>
<div className="schema-list-item">
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
{item.description ? (
<Tooltip
title={item.description}
mouseEnterDelay={0}
mouseLeaveDelay={0}
placement="right"
arrowPointAtCenter>
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
</Tooltip>
) : (
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
)}
<Tooltip
title="Insert table name into query text"
mouseEnterDelay={0}
Expand All @@ -73,22 +91,34 @@ function SchemaItem({ item, expanded, onToggle, onSelect, ...props }) {
map(item.columns, column => {
const columnName = get(column, "name");
const columnType = get(column, "type");
return (
<Tooltip
title="Insert column name into query text"
mouseEnterDelay={0}
mouseLeaveDelay={0}
placement="rightTop">
<PlainButton key={columnName} className="table-open-item" onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>
const columnComment = get(column, "comment");
justinclift marked this conversation as resolved.
Show resolved Hide resolved
if (columnComment) {
return (
<Tooltip title={columnComment} mouseEnterDelay={0} mouseLeaveDelay={0} placement="rightTop">
<PlainButton
key={columnName}
className="table-open-item"
onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>

<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
</Tooltip>
<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
</Tooltip>
);
}
return (
<PlainButton key={columnName} className="table-open-item" onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>
<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
);
})
)}
Expand Down
11 changes: 10 additions & 1 deletion redash/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,16 @@ def get_schema(self, refresh=False):

def _sort_schema(self, schema):
return [
{"name": i["name"], "columns": sorted(i["columns"], key=lambda x: x["name"] if isinstance(x, dict) else x)}
{
"name": i["name"],
"description": i.get("description"),
"columns": sorted(
i["columns"],
key=lambda col: (
("partition" in col["type"], col.get("idx", 0), col["name"]) if isinstance(col, dict) else col
),
),
}
for i in sorted(schema, key=lambda x: x["name"])
]

Expand Down
39 changes: 34 additions & 5 deletions redash/query_runner/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

try:
import boto3
import pandas as pd
import pyathena
from pyathena.pandas_cursor import PandasCursor

enabled = True
except ImportError:
Expand Down Expand Up @@ -188,10 +190,35 @@ def __get_schema_from_glue(self):
logger.warning("Glue table doesn't have StorageDescriptor: %s", table_name)
continue
if table_name not in schema:
column = [columns["Name"] for columns in table["StorageDescriptor"]["Columns"]]
schema[table_name] = {"name": table_name, "columns": column}
for partition in table.get("PartitionKeys", []):
schema[table_name]["columns"].append(partition["Name"])
columns = []
for cols in table["StorageDescriptor"]["Columns"]:
c = {
"name": cols["Name"],
}
if "Type" in cols:
c["type"] = cols["Type"]
if "Comment" in cols:
c["comment"] = cols["Comment"]
columns.append(c)

schema[table_name] = {
"name": table_name,
"columns": columns,
"description": table.get("Description"),
}
for idx, partition in enumerate(table.get("PartitionKeys", [])):
schema[table_name]["columns"].append(
{
"name": partition["Name"],
"type": "partition",
"idx": idx,
}
)
if "Type" in partition:
_type = partition["Type"]
c["type"] = f"partition ({_type})"
if "Comment" in partition:
c["comment"] = partition["Comment"]
return list(schema.values())

def get_schema(self, get_stats=False):
Expand Down Expand Up @@ -225,14 +252,16 @@ def run_query(self, query, user):
kms_key=self.configuration.get("kms_key", None),
work_group=self.configuration.get("work_group", "primary"),
formatter=SimpleFormatter(),
cursor_class=PandasCursor,
**self._get_iam_credentials(user=user),
).cursor()

try:
cursor.execute(query)
column_tuples = [(i[0], _TYPE_MAPPINGS.get(i[1], None)) for i in cursor.description]
columns = self.fetch_columns(column_tuples)
rows = [dict(zip(([c["name"] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())]
df = cursor.as_pandas().replace({pd.NA: None})
rows = df.to_dict(orient="records")
qbytes = None
athena_query_id = None
try:
Expand Down
18 changes: 15 additions & 3 deletions redash/query_runner/pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def build_schema(query_result, schema):
column = row["column_name"]
if row.get("data_type") is not None:
column = {"name": row["column_name"], "type": row["data_type"]}
if "column_comment" in row:
column["comment"] = row["column_comment"]

schema[table_name]["columns"].append(column)

Expand Down Expand Up @@ -222,7 +224,9 @@ def _get_tables(self, schema):
SELECT s.nspname as table_schema,
c.relname as table_name,
a.attname as column_name,
null as data_type
null as data_type,
null as column_comment,
null as idx
FROM pg_class c
JOIN pg_namespace s
ON c.relnamespace = s.oid
Expand All @@ -238,8 +242,16 @@ def _get_tables(self, schema):
SELECT table_schema,
table_name,
column_name,
data_type
FROM information_schema.columns
data_type,
pgd.description,
isc.ordinal_position
FROM information_schema.columns as isc
LEFT JOIN pg_catalog.pg_statio_all_tables as st
ON isc.table_schema = st.schemaname
AND isc.table_name = st.relname
LEFT JOIN pg_catalog.pg_description pgd
ON pgd.objoid=st.relid
AND pgd.objsubid=isc.ordinal_position
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
"""

Expand Down
20 changes: 12 additions & 8 deletions tests/models/test_data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class DataSourceTest(BaseTestCase):
def test_get_schema(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]

with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value
Expand All @@ -18,7 +18,7 @@ def test_get_schema(self):
self.assertEqual(return_value, schema)

def test_get_schema_uses_cache(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value

Expand All @@ -29,12 +29,12 @@ def test_get_schema_uses_cache(self):
self.assertEqual(patched_get_schema.call_count, 1)

def test_get_schema_skips_cache_with_refresh_true(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value

self.factory.data_source.get_schema()
new_return_value = [{"name": "new_table", "columns": []}]
new_return_value = [{"name": "new_table", "columns": [], "description": None}]
patched_get_schema.return_value = new_return_value
schema = self.factory.data_source.get_schema(refresh=True)

Expand All @@ -43,19 +43,21 @@ def test_get_schema_skips_cache_with_refresh_true(self):

def test_schema_sorter(self):
input_data = [
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
"description": None,
},
]

expected_output = [
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
"description": None,
},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]

real_output = self.factory.data_source._sort_schema(input_data)
Expand All @@ -64,19 +66,21 @@ def test_schema_sorter(self):

def test_model_uses_schema_sorter(self):
orig_schema = [
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
"description": None,
},
]

sorted_schema = [
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
"description": None,
},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]

with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
Expand Down
23 changes: 19 additions & 4 deletions tests/query_runner/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def test_external_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["row_id"], "name": "test1.jdbc_table"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "row_id", "type": "int"}], "name": "test1.jdbc_table", "description": None}
]

def test_partitioned_table(self):
"""
Expand Down Expand Up @@ -124,7 +126,16 @@ def test_partitioned_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["sk", "category"], "name": "test1.partitioned_table"}]
assert query_runner.get_schema() == [
{
"columns": [
{"name": "sk", "type": "partition (int)"},
{"name": "category", "type": "partition", "idx": 0},
],
"name": "test1.partitioned_table",
"description": None,
}
]

def test_view(self):
query_runner = Athena({"glue": True, "region": "mars-east-1"})
Expand Down Expand Up @@ -156,7 +167,9 @@ def test_view(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["sk"], "name": "test1.view"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "sk", "type": "int"}], "name": "test1.view", "description": None}
]

def test_dodgy_table_does_not_break_schema_listing(self):
"""
Expand Down Expand Up @@ -196,7 +209,9 @@ def test_dodgy_table_does_not_break_schema_listing(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["region"], "name": "test1.csv"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "region", "type": "string"}], "name": "test1.csv", "description": None}
]

def test_no_storage_descriptor_table(self):
"""
Expand Down
Loading