diff --git a/client/app/components/queries/SchemaBrowser.jsx b/client/app/components/queries/SchemaBrowser.jsx
index ed8d2402ed..730f4acf88 100644
--- a/client/app/components/queries/SchemaBrowser.jsx
+++ b/client/app/components/queries/SchemaBrowser.jsx
@@ -16,6 +16,7 @@ import LoadingState from "../items-list/components/LoadingState";
const SchemaItemColumnType = PropTypes.shape({
name: PropTypes.string.isRequired,
type: PropTypes.string,
+ comment: PropTypes.string,
});
export const SchemaItemType = PropTypes.shape({
@@ -47,13 +48,30 @@ function SchemaItem({ item, expanded, onToggle, onSelect, ...props }) {
return (
-
-
-
- {tableDisplayName}
- {!isNil(item.size) && ({item.size})}
-
-
+ {item.description ? (
+
+
+
+
+ {tableDisplayName}
+ {!isNil(item.size) && ({item.size})}
+
+
+
+ ) : (
+
+
+
+ {tableDisplayName}
+ {!isNil(item.size) && ({item.size})}
+
+
+ )}
{
const columnName = get(column, "name");
const columnType = get(column, "type");
- return (
-
- handleSelect(e, columnName)}>
-
- {columnName} {columnType && {columnType}}
-
+ const columnComment = get(column, "comment");
+ if (columnComment) {
+ return (
+
+ handleSelect(e, columnName)}>
+
+ {columnName} {columnType && {columnType}}
+
-
-
-
-
-
+
+
+
+
+
+ );
+ }
+ return (
+ handleSelect(e, columnName)}>
+
+ {columnName} {columnType && {columnType}}
+
+
+
+
+
);
})
)}
diff --git a/redash/models/__init__.py b/redash/models/__init__.py
index 91ddd91ee3..7ee0d99741 100644
--- a/redash/models/__init__.py
+++ b/redash/models/__init__.py
@@ -227,7 +227,16 @@ def get_schema(self, refresh=False):
def _sort_schema(self, schema):
return [
- {"name": i["name"], "columns": sorted(i["columns"], key=lambda x: x["name"] if isinstance(x, dict) else x)}
+ {
+ "name": i["name"],
+ "description": i.get("description"),
+ "columns": sorted(
+ i["columns"],
+ key=lambda col: (
+ ("partition" in col["type"], col.get("idx", 0), col["name"]) if isinstance(col, dict) else col
+ ),
+ ),
+ }
for i in sorted(schema, key=lambda x: x["name"])
]
diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py
index acde734f02..0a5f648417 100644
--- a/redash/query_runner/athena.py
+++ b/redash/query_runner/athena.py
@@ -21,7 +21,9 @@
try:
import boto3
+ import pandas as pd
import pyathena
+ from pyathena.pandas_cursor import PandasCursor
enabled = True
except ImportError:
@@ -188,10 +190,35 @@ def __get_schema_from_glue(self):
logger.warning("Glue table doesn't have StorageDescriptor: %s", table_name)
continue
if table_name not in schema:
- column = [columns["Name"] for columns in table["StorageDescriptor"]["Columns"]]
- schema[table_name] = {"name": table_name, "columns": column}
- for partition in table.get("PartitionKeys", []):
- schema[table_name]["columns"].append(partition["Name"])
+ columns = []
+ for cols in table["StorageDescriptor"]["Columns"]:
+ c = {
+ "name": cols["Name"],
+ }
+ if "Type" in cols:
+ c["type"] = cols["Type"]
+ if "Comment" in cols:
+ c["comment"] = cols["Comment"]
+ columns.append(c)
+
+ schema[table_name] = {
+ "name": table_name,
+ "columns": columns,
+ "description": table.get("Description"),
+ }
+ for idx, partition in enumerate(table.get("PartitionKeys", [])):
+ schema[table_name]["columns"].append(
+ {
+ "name": partition["Name"],
+ "type": "partition",
+ "idx": idx,
+ }
+ )
+ if "Type" in partition:
+ _type = partition["Type"]
+ c["type"] = f"partition ({_type})"
+ if "Comment" in partition:
+ c["comment"] = partition["Comment"]
return list(schema.values())
def get_schema(self, get_stats=False):
@@ -225,6 +252,7 @@ def run_query(self, query, user):
kms_key=self.configuration.get("kms_key", None),
work_group=self.configuration.get("work_group", "primary"),
formatter=SimpleFormatter(),
+ cursor_class=PandasCursor,
**self._get_iam_credentials(user=user),
).cursor()
@@ -232,7 +260,8 @@ def run_query(self, query, user):
cursor.execute(query)
column_tuples = [(i[0], _TYPE_MAPPINGS.get(i[1], None)) for i in cursor.description]
columns = self.fetch_columns(column_tuples)
- rows = [dict(zip(([c["name"] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())]
+ df = cursor.as_pandas().replace({pd.NA: None})
+ rows = df.to_dict(orient="records")
qbytes = None
athena_query_id = None
try:
diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py
index dc74aff2cc..46bf3cc0b8 100644
--- a/redash/query_runner/pg.py
+++ b/redash/query_runner/pg.py
@@ -108,6 +108,8 @@ def build_schema(query_result, schema):
column = row["column_name"]
if row.get("data_type") is not None:
column = {"name": row["column_name"], "type": row["data_type"]}
+ if "column_comment" in row:
+ column["comment"] = row["column_comment"]
schema[table_name]["columns"].append(column)
@@ -222,7 +224,9 @@ def _get_tables(self, schema):
SELECT s.nspname as table_schema,
c.relname as table_name,
a.attname as column_name,
- null as data_type
+ null as data_type,
+ null as column_comment,
+ null as idx
FROM pg_class c
JOIN pg_namespace s
ON c.relnamespace = s.oid
@@ -238,8 +242,16 @@ def _get_tables(self, schema):
SELECT table_schema,
table_name,
column_name,
- data_type
- FROM information_schema.columns
+ data_type,
+ pgd.description,
+ isc.ordinal_position
+ FROM information_schema.columns as isc
+ LEFT JOIN pg_catalog.pg_statio_all_tables as st
+ ON isc.table_schema = st.schemaname
+ AND isc.table_name = st.relname
+ LEFT JOIN pg_catalog.pg_description pgd
+ ON pgd.objoid=st.relid
+ AND pgd.objsubid=isc.ordinal_position
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
"""
diff --git a/tests/models/test_data_sources.py b/tests/models/test_data_sources.py
index 7db4d463a3..82bd4b5a7a 100644
--- a/tests/models/test_data_sources.py
+++ b/tests/models/test_data_sources.py
@@ -8,7 +8,7 @@
class DataSourceTest(BaseTestCase):
def test_get_schema(self):
- return_value = [{"name": "table", "columns": []}]
+ return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value
@@ -18,7 +18,7 @@ def test_get_schema(self):
self.assertEqual(return_value, schema)
def test_get_schema_uses_cache(self):
- return_value = [{"name": "table", "columns": []}]
+ return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value
@@ -29,12 +29,12 @@ def test_get_schema_uses_cache(self):
self.assertEqual(patched_get_schema.call_count, 1)
def test_get_schema_skips_cache_with_refresh_true(self):
- return_value = [{"name": "table", "columns": []}]
+ return_value = [{"name": "table", "columns": [], "description": None}]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
patched_get_schema.return_value = return_value
self.factory.data_source.get_schema()
- new_return_value = [{"name": "new_table", "columns": []}]
+ new_return_value = [{"name": "new_table", "columns": [], "description": None}]
patched_get_schema.return_value = new_return_value
schema = self.factory.data_source.get_schema(refresh=True)
@@ -43,10 +43,11 @@ def test_get_schema_skips_cache_with_refresh_true(self):
def test_schema_sorter(self):
input_data = [
- {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
+ {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
+ "description": None,
},
]
@@ -54,8 +55,9 @@ def test_schema_sorter(self):
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
+ "description": None,
},
- {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
+ {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]
real_output = self.factory.data_source._sort_schema(input_data)
@@ -64,10 +66,11 @@ def test_schema_sorter(self):
def test_model_uses_schema_sorter(self):
orig_schema = [
- {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
+ {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"], "description": None},
{
"name": "all_terain_vehicle",
"columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
+ "description": None,
},
]
@@ -75,8 +78,9 @@ def test_model_uses_schema_sorter(self):
{
"name": "all_terain_vehicle",
"columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
+ "description": None,
},
- {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
+ {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"], "description": None},
]
with mock.patch("redash.query_runner.pg.PostgreSQL.get_schema") as patched_get_schema:
diff --git a/tests/query_runner/test_athena.py b/tests/query_runner/test_athena.py
index 2ac4ee42fe..2c386ead11 100644
--- a/tests/query_runner/test_athena.py
+++ b/tests/query_runner/test_athena.py
@@ -75,7 +75,9 @@ def test_external_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
- assert query_runner.get_schema() == [{"columns": ["row_id"], "name": "test1.jdbc_table"}]
+ assert query_runner.get_schema() == [
+ {"columns": [{"name": "row_id", "type": "int"}], "name": "test1.jdbc_table", "description": None}
+ ]
def test_partitioned_table(self):
"""
@@ -124,7 +126,16 @@ def test_partitioned_table(self):
{"DatabaseName": "test1"},
)
with self.stubber:
- assert query_runner.get_schema() == [{"columns": ["sk", "category"], "name": "test1.partitioned_table"}]
+ assert query_runner.get_schema() == [
+ {
+ "columns": [
+ {"name": "sk", "type": "partition (int)"},
+ {"name": "category", "type": "partition", "idx": 0},
+ ],
+ "name": "test1.partitioned_table",
+ "description": None,
+ }
+ ]
def test_view(self):
query_runner = Athena({"glue": True, "region": "mars-east-1"})
@@ -156,7 +167,9 @@ def test_view(self):
{"DatabaseName": "test1"},
)
with self.stubber:
- assert query_runner.get_schema() == [{"columns": ["sk"], "name": "test1.view"}]
+ assert query_runner.get_schema() == [
+ {"columns": [{"name": "sk", "type": "int"}], "name": "test1.view", "description": None}
+ ]
def test_dodgy_table_does_not_break_schema_listing(self):
"""
@@ -196,7 +209,9 @@ def test_dodgy_table_does_not_break_schema_listing(self):
{"DatabaseName": "test1"},
)
with self.stubber:
- assert query_runner.get_schema() == [{"columns": ["region"], "name": "test1.csv"}]
+ assert query_runner.get_schema() == [
+ {"columns": [{"name": "region", "type": "string"}], "name": "test1.csv", "description": None}
+ ]
def test_no_storage_descriptor_table(self):
"""