diff --git a/contrib/cache/__init__.py b/contrib/cache/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/contrib/connectors/pandas/cache.py b/contrib/cache/dataframe.py similarity index 96% rename from contrib/connectors/pandas/cache.py rename to contrib/cache/dataframe.py index f013050715ed0..415e6549073f0 100644 --- a/contrib/connectors/pandas/cache.py +++ b/contrib/cache/dataframe.py @@ -182,8 +182,8 @@ def dec(self, key, delta=1): raise NotImplementedError() -dataframe_cache = DataFrameCache( - cache_dir='/tmp/pandasdatasource_cache', - threshold=200, - default_timeout=24 * 60 * 60, -) +def dataframe(app, config, args, kwargs): + """Return a DataFrameCache for use by Flask-Cache.""" + args.insert(0, config['CACHE_DIR']) + kwargs.update(dict(threshold=config['CACHE_THRESHOLD'])) + return DataFrameCache(*args, **kwargs) diff --git a/contrib/connectors/pandas/models.py b/contrib/connectors/pandas/models.py index a9fdd0be9de2d..6b899346b1571 100644 --- a/contrib/connectors/pandas/models.py +++ b/contrib/connectors/pandas/models.py @@ -28,14 +28,12 @@ from flask_appbuilder import Model from flask_babel import lazy_gettext as _ -from superset import db, utils, sm +from superset import dataframe_cache, db, utils, sm from superset.connectors.base.models import ( BaseDatasource, BaseColumn, BaseMetric) from superset.models.helpers import QueryResult, set_perm from superset.utils import QueryStatus -from .cache import dataframe_cache - FORMATS = [ ('csv', 'csv'), ('html', 'html'), @@ -311,8 +309,9 @@ def get_dataframe(self): and add any calculated columns to the DataFrame. """ if self.df is None: - cache_key = self.cache_key - self.df = dataframe_cache.get(cache_key) + if dataframe_cache: + cache_key = self.cache_key + self.df = dataframe_cache.get(cache_key) if not isinstance(self.df, pd.DataFrame): self.df = self.pandas_read_method(self.source_url, **self.pandas_read_parameters) @@ -324,8 +323,9 @@ def get_dataframe(self): # Our column names are always strings self.df.columns = [str(col) for col in self.df.columns] - timeout = self.cache_timeout or self.database.cache_timeout - dataframe_cache.set(cache_key, self.df, timeout) + if dataframe_cache: + timeout = self.cache_timeout or self.database.cache_timeout + dataframe_cache.set(cache_key, self.df, timeout) calculated_columns = [] for col in self.columns: diff --git a/superset/__init__.py b/superset/__init__.py index 6988e6b580fbb..69aa7236fb7b3 100644 --- a/superset/__init__.py +++ b/superset/__init__.py @@ -91,6 +91,13 @@ def get_js_manifest(): cache = utils.setup_cache(app, conf.get('CACHE_CONFIG')) tables_cache = utils.setup_cache(app, conf.get('TABLE_NAMES_CACHE_CONFIG')) +# For example: +# DATAFRAME_CACHE_CONFIG = { +# 'CACHE_TYPE': 'contrib.connectors.pandas.cache.dataframe', +# 'CACHE_DEFAULT_TIMEOUT': 60 * 60 * 24, +# 'CACHE_DIR': '/tmp/pandasdatasource_cache', +# 'CACHE_THRESHOLD': 200} +dataframe_cache = utils.setup_cache(app, conf.get('DATAFRAME_CACHE_CONFIG')) migrate = Migrate(app, db, directory=APP_DIR + "/migrations")