From 38d6c03f24c3ab0a7eb6fa852640f623af39709e Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Tue, 3 Oct 2017 01:06:41 +0200 Subject: [PATCH] Support additional Pandas formats if dependencies are satisfied - see #3302 --- contrib/connectors/pandas/models.py | 37 ++++++++++++++++++++--------- contrib/connectors/pandas/views.py | 8 +++---- setup.py | 2 ++ 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/contrib/connectors/pandas/models.py b/contrib/connectors/pandas/models.py index 8bab58c6949e6..816b360ba7c6f 100644 --- a/contrib/connectors/pandas/models.py +++ b/contrib/connectors/pandas/models.py @@ -34,6 +34,27 @@ from superset.utils import QueryStatus +FORMATS = [ + ('csv', 'csv'), + ('html', 'html'), + ('json', 'json'), + ('excel', 'Microsoft Excel'), + ('stata', 'Stata'), +] + +try: + import tables # NOQA + FORMATS.append(('hdf', 'HDF5')) +except ImportError: + pass + +try: + import feather # NOQA + FORMATS.append(('feather', 'Feather')) +except ImportError: + pass + + class PandasDatabase(object): """Non-ORM object for a Pandas Source""" @@ -123,13 +144,6 @@ def perm(self): class PandasDatasource(Model, BaseDatasource): """A datasource based on a Pandas DataFrame""" - FORMATS = [ - ('csv', 'csv'), - ('html', 'html'), - ('json', 'json'), - ('excel', 'Microsoft Excel'), - ] - # See http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases # NOQA GRAINS = OrderedDict([ ('5 seconds', '5S'), @@ -577,7 +591,7 @@ def process_dataframe( # If there is more than one DataFrame in the list then # concatenate them along the index if len(dfs) > 1: - df = pd.concat(dfs, axis=1) + df = pd.concat(dfs, axis=0) query_str = 'pd.concat([{}])'.format(', '.join(query_strs)) else: df = dfs[0] @@ -627,9 +641,10 @@ def process_dataframe( ascending=ascending) # Remove metrics only added for post-aggregation filtering - df = df.drop(filtered_metrics, axis=1) - query_str += '.drop({filtered_metrics}, axis=1)'.format( - filtered_metrics=filtered_metrics) + if filtered_metrics: + df = df.drop(filtered_metrics, axis=1) + query_str += '.drop({filtered_metrics}, axis=1)'.format( + filtered_metrics=filtered_metrics) elif groupby: # Group by without any metrics is equivalent to SELECT DISTINCT, diff --git a/contrib/connectors/pandas/views.py b/contrib/connectors/pandas/views.py index 3937e4f05fb2e..f7a4bb30b2670 100644 --- a/contrib/connectors/pandas/views.py +++ b/contrib/connectors/pandas/views.py @@ -20,7 +20,7 @@ get_datasource_exist_error_mgs, ) -from .models import PandasDatasource, PandasColumn, PandasMetric +from .models import FORMATS, PandasDatasource, PandasColumn, PandasMetric class ChoiceTypeSelectField(SelectField): @@ -158,8 +158,7 @@ class PandasDatasourceModelView(DatasourceModelView, DeleteMixin): # noqa 'link', 'changed_on_'] add_columns = ['name', 'source_url', 'format'] add_form_extra_fields = { - 'format': ChoiceTypeSelectField(_('Format'), - choices=PandasDatasource.FORMATS) + 'format': ChoiceTypeSelectField(_('Format'), choices=FORMATS) } edit_columns = [ 'name', 'source_url', 'format', @@ -168,8 +167,7 @@ class PandasDatasourceModelView(DatasourceModelView, DeleteMixin): # noqa 'description', 'owner', 'main_dttm_col', 'default_endpoint', 'offset', 'cache_timeout'] edit_form_extra_fields = { - 'format': ChoiceTypeSelectField(_('Format'), - choices=PandasDatasource.FORMATS) + 'format': ChoiceTypeSelectField(_('Format'), choices=FORMATS) } show_columns = edit_columns + ['perm'] related_views = [PandasColumnInlineView, PandasMetricInlineView] diff --git a/setup.py b/setup.py index 1988c10a730f7..df81a223e1302 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ def get_git_sha(): install_requires=[ 'beautifulsoup4==4.6.0', 'boto3==1.4.4', + 'bottleneck==1.2.1', 'celery==3.1.25', 'colorama==0.3.9', 'cryptography==1.9', @@ -63,6 +64,7 @@ def get_git_sha(): 'idna==2.5', 'lxml==3.8.0', 'markdown==2.6.8', + 'numexpr==2.6.4', 'pandas==0.20.3', 'parsedatetime==2.0.0', 'pydruid==0.3.1',