Skip to content

Commit

Permalink
show pg and athena column comments and table descriptions as antd too…
Browse files Browse the repository at this point in the history
…ltip if they are defined (getredash#6582)

* show column comments by default for athena and postgres

* Restyled by prettier

* fixed typo

* fmt fix

* ordered imports

* fixed unit tests

* fixed tests for athena

---------

Co-authored-by: Andrew Chubatiuk <andrew.chubatiuk@motional.com>
Co-authored-by: Restyled.io <commits@restyled.io>
Co-authored-by: Andrii Chubatiuk <wachy@Andriis-MBP-2.lan>
  • Loading branch information
4 people authored Apr 12, 2024
1 parent 6d64127 commit c12d450
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 43 deletions.
74 changes: 52 additions & 22 deletions client/app/components/queries/SchemaBrowser.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import LoadingState from "../items-list/components/LoadingState";
const SchemaItemColumnType = PropTypes.shape({
name: PropTypes.string.isRequired,
type: PropTypes.string,
comment: PropTypes.string,
});

export const SchemaItemType = PropTypes.shape({
Expand Down Expand Up @@ -47,13 +48,30 @@ function SchemaItem({ item, expanded, onToggle, onSelect, ...props }) {
return (
<div {...props}>
<div className="schema-list-item">
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
{item.description ? (
<Tooltip
title={item.description}
mouseEnterDelay={0}
mouseLeaveDelay={0}
placement="right"
arrowPointAtCenter>
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
</Tooltip>
) : (
<PlainButton className="table-name" onClick={onToggle}>
<i className="fa fa-table m-r-5" aria-hidden="true" />
<strong>
<span title={item.name}>{tableDisplayName}</span>
{!isNil(item.size) && <span> ({item.size})</span>}
</strong>
</PlainButton>
)}
<Tooltip
title="Insert table name into query text"
mouseEnterDelay={0}
Expand All @@ -73,22 +91,34 @@ function SchemaItem({ item, expanded, onToggle, onSelect, ...props }) {
map(item.columns, column => {
const columnName = get(column, "name");
const columnType = get(column, "type");
return (
<Tooltip
title="Insert column name into query text"
mouseEnterDelay={0}
mouseLeaveDelay={0}
placement="rightTop">
<PlainButton key={columnName} className="table-open-item" onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>
const columnComment = get(column, "comment");
if (columnComment) {
return (
<Tooltip title={columnComment} mouseEnterDelay={0} mouseLeaveDelay={0} placement="rightTop">
<PlainButton
key={columnName}
className="table-open-item"
onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>

<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
</Tooltip>
<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
</Tooltip>
);
}
return (
<PlainButton key={columnName} className="table-open-item" onClick={e => handleSelect(e, columnName)}>
<div>
{columnName} {columnType && <span className="column-type">{columnType}</span>}
</div>
<div className="copy-to-editor">
<i className="fa fa-angle-double-right" aria-hidden="true" />
</div>
</PlainButton>
);
})
)}
Expand Down
11 changes: 10 additions & 1 deletion redash/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,16 @@ def get_schema(self, refresh=False):

def _sort_schema(self, schema):
return [
{"name": i["name"], "columns": sorted(i["columns"], key=lambda x: x["name"] if isinstance(x, dict) else x)}
{
"name": i["name"],
"description": i.get("description"),
"columns": sorted(
i["columns"],
key=lambda col: (
("partition" in col["type"], col.get("idx", 0), col["name"]) if isinstance(col, dict) else col
),
),
}
for i in sorted(schema, key=lambda x: x["name"])
]

Expand Down
39 changes: 34 additions & 5 deletions redash/query_runner/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

try:
import boto3
import pandas as pd
import pyathena
from pyathena.pandas_cursor import PandasCursor

enabled = True
except ImportError:
Expand Down Expand Up @@ -188,10 +190,35 @@ def __get_schema_from_glue(self):
logger.warning("Glue table doesn't have StorageDescriptor: %s", table_name)
continue
if table_name not in schema:
column = [columns["Name"] for columns in table["StorageDescriptor"]["Columns"]]
schema[table_name] = {"name": table_name, "columns": column}
for partition in table.get("PartitionKeys", []):
schema[table_name]["columns"].append(partition["Name"])
columns = []
for cols in table["StorageDescriptor"]["Columns"]:
c = {
"name": cols["Name"],
}
if "Type" in cols:
c["type"] = cols["Type"]
if "Comment" in cols:
c["comment"] = cols["Comment"]
columns.append(c)

schema[table_name] = {
"name": table_name,
"columns": columns,
"description": table.get("Description"),
}
for idx, partition in enumerate(table.get("PartitionKeys", [])):
schema[table_name]["columns"].append(
{
"name": partition["Name"],
"type": "partition",
"idx": idx,
}
)
if "Type" in partition:
_type = partition["Type"]
c["type"] = f"partition ({_type})"
if "Comment" in partition:
c["comment"] = partition["Comment"]
return list(schema.values())

def get_schema(self, get_stats=False):
Expand Down Expand Up @@ -225,14 +252,16 @@ def run_query(self, query, user):
kms_key=self.configuration.get("kms_key", None),
work_group=self.configuration.get("work_group", "primary"),
formatter=SimpleFormatter(),
cursor_class=PandasCursor,
**self._get_iam_credentials(user=user),
).cursor()

try:
cursor.execute(query)
column_tuples = [(i[0], _TYPE_MAPPINGS.get(i[1], None)) for i in cursor.description]
columns = self.fetch_columns(column_tuples)
rows = [dict(zip(([c["name"] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())]
df = cursor.as_pandas().replace({pd.NA: None})
rows = df.to_dict(orient="records")
qbytes = None
athena_query_id = None
try:
Expand Down
18 changes: 15 additions & 3 deletions redash/query_runner/pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def build_schema(query_result, schema):
column = row["column_name"]
if row.get("data_type") is not None:
column = {"name": row["column_name"], "type": row["data_type"]}
if "column_comment" in row:
column["comment"] = row["column_comment"]

schema[table_name]["columns"].append(column)

Expand Down Expand Up @@ -222,7 +224,9 @@ def _get_tables(self, schema):
SELECT s.nspname as table_schema,
c.relname as table_name,
a.attname as column_name,
null as data_type
null as data_type,
null as column_comment,
null as idx
FROM pg_class c
JOIN pg_namespace s
ON c.relnamespace = s.oid
Expand All @@ -238,8 +242,16 @@ def _get_tables(self, schema):
SELECT table_schema,
table_name,
column_name,
data_type
FROM information_schema.columns
data_type,
pgd.description,
isc.ordinal_position
FROM information_schema.columns as isc
LEFT JOIN pg_catalog.pg_statio_all_tables as st
ON isc.table_schema = st.schemaname
AND isc.table_name = st.relname
LEFT JOIN pg_catalog.pg_description pgd
ON pgd.objoid=st.relid
AND pgd.objsubid=isc.ordinal_position
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
"""

Expand Down
20 changes: 12 additions & 8 deletions tests/models/test_data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class DataSourceTest(BaseTestCase):
def test_get_schema(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]

with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value
Expand All @@ -18,7 +18,7 @@ def test_get_schema(self):
self.assertEqual(return_value, schema)

def test_get_schema_uses_cache(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value

Expand All @@ -29,12 +29,12 @@ def test_get_schema_uses_cache(self):
self.assertEqual(patched_get_schema.call_count, 1)

def test_get_schema_skips_cache_with_refresh_true(self):
return_value = [{"name": "table", "columns": []}]
return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value

self.factory.data_source.get_schema()
new_return_value = [{"name": "new_table", "columns": []}]
new_return_value = [{"name": "new_table", "columns": [], "description": None}]
patched_get_schema.return_value = new_return_value
schema = self.factory.data_source.get_schema(refresh=True)

Expand All @@ -43,19 +43,21 @@ def test_get_schema_skips_cache_with_refresh_true(self):

def test_schema_sorter(self):
input_data = [
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
"description": None,
},
]

expected_output = [
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
"description": None,
},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]

real_output = self.factory.data_source._sort_schema(input_data)
Expand All @@ -64,19 +66,21 @@ def test_schema_sorter(self):

def test_model_uses_schema_sorter(self):
orig_schema = [
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
{"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
"description": None,
},
]

sorted_schema = [
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
"description": None,
},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
{"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]

with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
Expand Down
23 changes: 19 additions & 4 deletions tests/query_runner/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def test_external_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["row_id"], "name": "test1.jdbc_table"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "row_id", "type": "int"}], "name": "test1.jdbc_table", "description": None}
]

def test_partitioned_table(self):
"""
Expand Down Expand Up @@ -124,7 +126,16 @@ def test_partitioned_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["sk", "category"], "name": "test1.partitioned_table"}]
assert query_runner.get_schema() == [
{
"columns": [
{"name": "sk", "type": "partition (int)"},
{"name": "category", "type": "partition", "idx": 0},
],
"name": "test1.partitioned_table",
"description": None,
}
]

def test_view(self):
query_runner = Athena({"glue": True, "region": "mars-east-1"})
Expand Down Expand Up @@ -156,7 +167,9 @@ def test_view(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["sk"], "name": "test1.view"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "sk", "type": "int"}], "name": "test1.view", "description": None}
]

def test_dodgy_table_does_not_break_schema_listing(self):
"""
Expand Down Expand Up @@ -196,7 +209,9 @@ def test_dodgy_table_does_not_break_schema_listing(self):
{"DatabaseName": "test1"},
)
with self.stubber:
assert query_runner.get_schema() == [{"columns": ["region"], "name": "test1.csv"}]
assert query_runner.get_schema() == [
{"columns": [{"name": "region", "type": "string"}], "name": "test1.csv", "description": None}
]

def test_no_storage_descriptor_table(self):
"""
Expand Down

0 comments on commit c12d450

Please sign in to comment.