Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: excel sheet upload is not working #10450

Merged
merged 5 commits into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,10 +438,7 @@ def excel_to_df(**kwargs: Any) -> pd.DataFrame:
"""
kwargs["encoding"] = "utf-8"
kwargs["iterator"] = True
chunks = pd.io.excel.read_excel(
io=kwargs["filepath_or_buffer"], sheet_name=kwargs["sheet_name"]
)
df = pd.concat(chunk for chunk in chunks.values())
pphszx marked this conversation as resolved.
Show resolved Hide resolved
df = pd.read_excel(**kwargs)
return df

@staticmethod
Expand Down Expand Up @@ -513,7 +510,7 @@ def create_table_from_excel( # pylint: disable=too-many-arguments
Create table from contents of a excel. Note: this method does not create
metadata for the table.
"""
df = cls.excel_to_df(filepath_or_buffer=filename, **excel_to_df_kwargs,)
df = cls.excel_to_df(io=filename, **excel_to_df_kwargs,)
engine = cls.get_engine(database)
pphszx marked this conversation as resolved.
Show resolved Hide resolved
if table.schema:
# only add schema when it is preset and non empty
Expand Down
40 changes: 21 additions & 19 deletions superset/views/database/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,20 +180,18 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:
)
skip_blank_lines = BooleanField(
_("Skip Blank Lines"),
description=_(
"Skip blank lines rather than interpreting them " "as NaN values."
),
description=_("Skip blank lines rather than interpreting them as NaN values."),
)
parse_dates = CommaSeparatedListField(
_("Parse Dates"),
description=_(
"A comma separated list of columns that should be " "parsed as dates."
"A comma separated list of columns that should be parsed as dates."
),
filters=[filter_not_empty_values],
)
infer_datetime_format = BooleanField(
_("Infer Datetime Format"),
description=_("Use Pandas to interpret the datetime format " "automatically."),
description=_("Use Pandas to interpret the datetime format automatically."),
)
decimal = StringField(
_("Decimal Character"),
Expand Down Expand Up @@ -228,16 +226,16 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:

class ExcelToDatabaseForm(DynamicForm):
# pylint: disable=E0211
def excel_allowed_dbs(): # type: ignore
excel_allowed_dbs = []
def excel_allowed_dbs() -> List[Database]: # type: ignore
# TODO: change allow_csv_upload to allow_file_upload
excel_enabled_dbs = (
db.session.query(Database).filter_by(allow_csv_upload=True).all()
)
for excel_enabled_db in excel_enabled_dbs:
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db):
excel_allowed_dbs.append(excel_enabled_db)
return excel_allowed_dbs
pphszx marked this conversation as resolved.
Show resolved Hide resolved
return [
excel_enabled_db
for excel_enabled_db in excel_enabled_dbs
if ExcelToDatabaseForm.at_least_one_schema_is_allowed(excel_enabled_db)
]

@staticmethod
def at_least_one_schema_is_allowed(database: Database) -> bool:
Expand Down Expand Up @@ -265,10 +263,7 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:
b) if database supports schema
user is able to upload to schema in schemas_allowed_for_csv_upload
"""
if (
security_manager.database_access(database)
or security_manager.all_datasource_access()
):
if security_manager.can_access_database(database):
return True
pphszx marked this conversation as resolved.
Show resolved Hide resolved
schemas = database.get_schema_access_for_csv_upload()
if schemas and security_manager.schemas_accessible_by_user(
Expand Down Expand Up @@ -304,7 +299,10 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:
)

sheet_name = StringField(
_("Sheet Name"), description="Sheet Name", validators=[Optional()]
_("Sheet Name"),
description=_("Strings used for sheet names (default is the first sheet)."),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)

con = QuerySelectField(
Expand Down Expand Up @@ -356,9 +354,6 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
skipinitialspace = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
)
skiprows = IntegerField(
pphszx marked this conversation as resolved.
Show resolved Hide resolved
_("Skip Rows"),
description=_("Number of rows to skip at start of file."),
Expand All @@ -371,6 +366,13 @@ def at_least_one_schema_is_allowed(database: Database) -> bool:
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
parse_dates = CommaSeparatedListField(
_("Parse Dates"),
description=_(
"A comma separated list of columns that should be parsed as dates."
),
filters=[filter_not_empty_values],
)
decimal = StringField(
pphszx marked this conversation as resolved.
Show resolved Hide resolved
_("Decimal Character"),
default=".",
Expand Down
18 changes: 12 additions & 6 deletions superset/views/database/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,9 @@ class ExcelToDatabaseView(SimpleFormView):
def form_get(self, form: ExcelToDatabaseForm) -> None:
form.header.data = 0
form.mangle_dupe_cols.data = True
form.skipinitialspace.data = False
form.decimal.data = "."
form.if_exists.data = "fail"
form.sheet_name = None
form.sheet_name.data = ""

pphszx marked this conversation as resolved.
Show resolved Hide resolved
def form_post(self, form: ExcelToDatabaseForm) -> Response:
database = form.con.data
Expand Down Expand Up @@ -307,16 +306,23 @@ def form_post(self, form: ExcelToDatabaseForm) -> Response:
database = (
db.session.query(models.Database).filter_by(id=con.data.get("id")).one()
)

# some params are not supported by pandas.read_excel (e.g. chunksize).
# More can be found here:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
excel_to_df_kwargs = {
"header": form.header.data if form.header.data else 0,
"index_col": form.index_col.data,
"mangle_dupe_cols": form.mangle_dupe_cols.data,
"skipinitialspace": form.skipinitialspace.data,
"skiprows": form.skiprows.data,
"nrows": form.nrows.data,
"sheet_name": form.sheet_name.data,
"chunksize": 1000,
"sheet_name": form.sheet_name.data if form.sheet_name.data else 0,
"parse_dates": form.parse_dates.data,
}
if form.null_values.data:
excel_to_df_kwargs["na_values"] = form.null_values.data
excel_to_df_kwargs["keep_default_na"] = False

df_to_sql_kwargs = {
"name": excel_table.table,
"if_exists": form.if_exists.data,
Expand All @@ -336,7 +342,7 @@ def form_post(self, form: ExcelToDatabaseForm) -> Response:
# E.g. if hive was used to upload a excel, presto will be a better option
# to explore the table.
expore_database = database
explore_database_id = database.get_extra().get("explore_database_id", None)
explore_database_id = database.explore_database_id
if explore_database_id:
pphszx marked this conversation as resolved.
Show resolved Hide resolved
expore_database = (
db.session.query(models.Database)
Expand Down