Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Excel sheet upload #9825

Merged
merged 1 commit into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ If you run a production system you should schedule downtime for this
upgrade.

The PRs bellow have more information around the breaking changes:
* [9825](https://github.com/apache/incubator-superset/pull/9825): Support for Excel sheet upload added. To enable support, install Superset with the optional dependency `excel`

* [4587](https://github.com/apache/incubator-superset/pull/4587) : a backward
incompatible database migration that requires downtime. Once the
db migration succeeds, the web server needs to be restarted with the
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ yarl==1.4.2 # via aiohttp
zipp==3.1.0 # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools
# setuptools
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def get_git_sha():
"dremio": ["sqlalchemy_dremio>=1.1.0"],
"cockroachdb": ["cockroachdb==0.3.3"],
"thumbnails": ["Pillow>=7.0.0, <8.0.0"],
"excel": ["xlrd>=1.2.0, <1.3"],
},
python_requires="~=3.6",
author="Apache Software Foundation",
Expand Down
45 changes: 35 additions & 10 deletions superset/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,11 @@ def init_views(self) -> None:
DashboardModelViewAsync,
)
from superset.views.database.api import DatabaseRestApi
from superset.views.database.views import DatabaseView, CsvToDatabaseView
from superset.views.database.views import (
DatabaseView,
CsvToDatabaseView,
ExcelToDatabaseView,
)
from superset.views.datasource import Datasource
from superset.views.log.api import LogRestApi
from superset.views.log.views import LogModelView
Expand Down Expand Up @@ -265,6 +269,7 @@ def init_views(self) -> None:
appbuilder.add_view_no_menu(Api)
appbuilder.add_view_no_menu(CssTemplateAsyncModelView)
appbuilder.add_view_no_menu(CsvToDatabaseView)
appbuilder.add_view_no_menu(ExcelToDatabaseView)
appbuilder.add_view_no_menu(Dashboard)
appbuilder.add_view_no_menu(DashboardModelViewAsync)
appbuilder.add_view_no_menu(Datasource)
Expand Down Expand Up @@ -324,15 +329,35 @@ def init_views(self) -> None:
category="SQL Lab",
category_label=__("SQL Lab"),
)
appbuilder.add_link(
"Upload a CSV",
label=__("Upload a CSV"),
href="/csvtodatabaseview/form",
icon="fa-upload",
category="Sources",
category_label=__("Sources"),
category_icon="fa-wrench",
)
if self.config["CSV_EXTENSIONS"].intersection(
self.config["ALLOWED_EXTENSIONS"]
):
appbuilder.add_link(
"Upload a CSV",
label=__("Upload a CSV"),
href="/csvtodatabaseview/form",
icon="fa-upload",
category="Sources",
category_label=__("Sources"),
category_icon="fa-wrench",
)
try:
import xlrd # pylint: disable=unused-import

if self.config["EXCEL_EXTENSIONS"].intersection(
self.config["ALLOWED_EXTENSIONS"]
):
appbuilder.add_link(
"Upload Excel",
label=__("Upload Excel"),
href="/exceltodatabaseview/form",
icon="fa-upload",
category="Sources",
category_label=__("Sources"),
category_icon="fa-wrench",
)
except ImportError:
pass

#
# Conditionally setup log views
Expand Down
5 changes: 3 additions & 2 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,9 @@ def _try_json_readsha( # pylint: disable=unused-argument
SUPERSET_WEBSERVER_DOMAINS = None

# Allowed format types for upload on Database view
# TODO: Add processing of other spreadsheet formats (xls, xlsx etc)
ALLOWED_EXTENSIONS = {"csv", "tsv"}
EXCEL_EXTENSIONS = {"xlsx", "xls"}
CSV_EXTENSIONS = {"csv", "tsv"}
ALLOWED_EXTENSIONS = {*EXCEL_EXTENSIONS, *CSV_EXTENSIONS}

# CSV Options: key/value pairs that will be passed as argument to DataFrame.to_csv
# method.
Expand Down
36 changes: 36 additions & 0 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,20 @@ def set_or_update_query_limit(cls, sql: str, limit: int) -> str:
parsed_query = sql_parse.ParsedQuery(sql)
return parsed_query.set_or_update_query_limit(limit)

@staticmethod
def excel_to_df(**kwargs: Any) -> pd.DataFrame:
""" Read excel into Pandas DataFrame
:param kwargs: params to be passed to DataFrame.read_excel
:return: Pandas DataFrame containing data from excel
"""
kwargs["encoding"] = "utf-8"
kwargs["iterator"] = True
chunks = pd.io.excel.read_excel(
io=kwargs["filepath_or_buffer"], sheet_name=kwargs["sheet_name"]
)
df = pd.concat(chunk for chunk in chunks.values())
return df

@staticmethod
def csv_to_df(**kwargs: Any) -> pd.DataFrame:
""" Read csv into Pandas DataFrame
Expand Down Expand Up @@ -486,6 +500,28 @@ def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
"""
return None

@classmethod
def create_table_from_excel( # pylint: disable=too-many-arguments
cls,
filename: str,
table: Table,
database: "Database",
excel_to_df_kwargs: Dict[str, Any],
df_to_sql_kwargs: Dict[str, Any],
) -> None:
"""
Create table from contents of a excel. Note: this method does not create
metadata for the table.
"""
df = cls.excel_to_df(filepath_or_buffer=filename, **excel_to_df_kwargs,)
engine = cls.get_engine(database)
if table.schema:
# only add schema when it is preset and non empty
df_to_sql_kwargs["schema"] = table.schema
if engine.dialect.supports_multivalues_insert:
df_to_sql_kwargs["method"] = "multi"
cls.df_to_sql(df=df, con=engine, **df_to_sql_kwargs)

@classmethod
def get_all_datasource_names(
cls, database: "Database", datasource_type: str
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{#
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
#}
{% extends 'appbuilder/general/model/edit.html' %}

{% block tail_js %}
{{ super() }}
<script>
var db = $("#con");
var schema = $("#schema");

// this element is a text input
// copy it here so it can be reused later
var any_schema_is_allowed = schema.clone();

update_schemas_allowed_for_excel_upload(db.val());
db.change(function(){
update_schemas_allowed_for_excel_upload(db.val());
});

function update_schemas_allowed_for_excel_upload(db_id) {
$.ajax({
method: "GET",
url: "/superset/schemas_access_for_excel_upload",
data: {db_id: db_id},
dataType: 'json',
contentType: "application/json; charset=utf-8"
}).done(function(data) {
change_schema_field_in_formview(data)
}).fail(function(error) {
var errorMsg = error.responseJSON.error;
alert("ERROR: " + errorMsg);
});
}

function change_schema_field_in_formview(schemas_allowed){
if (schemas_allowed && schemas_allowed.length > 0) {
var dropdown_schema_lists = '<select id="schema" name="schema" required>';
schemas_allowed.forEach(function(schema_allowed) {
dropdown_schema_lists += ('<option value="' + schema_allowed + '">' + schema_allowed + '</option>');
});
dropdown_schema_lists += '</select>';
$("#schema").replaceWith(dropdown_schema_lists);
} else {
$("#schema").replaceWith(any_schema_is_allowed)
}
}
</script>
{% endblock %}
Loading