Configure dataframe cache from settings - see apache#3302

kimetrica · Oct 10, 2017 · a0657a9 · a0657a9
1 parent 3610ac0
commit a0657a9
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 12 deletions.
diff --git a/contrib/cache/__init__.py b/contrib/cache/__init__.py
diff --git a/contrib/connectors/pandas/cache.py → contrib/cache/dataframe.py b/contrib/connectors/pandas/cache.py → contrib/cache/dataframe.py
@@ -182,8 +182,8 @@ def dec(self, key, delta=1):
         raise NotImplementedError()
 
 
-dataframe_cache = DataFrameCache(
-    cache_dir='/tmp/pandasdatasource_cache',
-    threshold=200,
-    default_timeout=24 * 60 * 60,
-)
+def dataframe(app, config, args, kwargs):
+    """Return a DataFrameCache for use by Flask-Cache."""
+    args.insert(0, config['CACHE_DIR'])
+    kwargs.update(dict(threshold=config['CACHE_THRESHOLD']))
+    return DataFrameCache(*args, **kwargs)
diff --git a/contrib/connectors/pandas/models.py b/contrib/connectors/pandas/models.py
@@ -28,14 +28,12 @@
 from flask_appbuilder import Model
 from flask_babel import lazy_gettext as _
 
-from superset import db, utils, sm
+from superset import dataframe_cache, db, utils, sm
 from superset.connectors.base.models import (
     BaseDatasource, BaseColumn, BaseMetric)
 from superset.models.helpers import QueryResult, set_perm
 from superset.utils import QueryStatus
 
-from .cache import dataframe_cache
-
 FORMATS = [
     ('csv', 'csv'),
     ('html', 'html'),
@@ -311,8 +309,9 @@ def get_dataframe(self):
         and add any calculated columns to the DataFrame.
         """
         if self.df is None:
-            cache_key = self.cache_key
-            self.df = dataframe_cache.get(cache_key)
+            if dataframe_cache:
+                cache_key = self.cache_key
+                self.df = dataframe_cache.get(cache_key)
             if not isinstance(self.df, pd.DataFrame):
                 self.df = self.pandas_read_method(self.source_url, **self.pandas_read_parameters)
 
@@ -324,8 +323,9 @@ def get_dataframe(self):
                 # Our column names are always strings
                 self.df.columns = [str(col) for col in self.df.columns]
 
-                timeout = self.cache_timeout or self.database.cache_timeout
-                dataframe_cache.set(cache_key, self.df, timeout)
+                if dataframe_cache:
+                    timeout = self.cache_timeout or self.database.cache_timeout
+                    dataframe_cache.set(cache_key, self.df, timeout)
 
         calculated_columns = []
         for col in self.columns:

diff --git a/superset/__init__.py b/superset/__init__.py
@@ -91,6 +91,13 @@ def get_js_manifest():
 
 cache = utils.setup_cache(app, conf.get('CACHE_CONFIG'))
 tables_cache = utils.setup_cache(app, conf.get('TABLE_NAMES_CACHE_CONFIG'))
+# For example:
+# DATAFRAME_CACHE_CONFIG = {
+#    'CACHE_TYPE': 'contrib.connectors.pandas.cache.dataframe',
+#    'CACHE_DEFAULT_TIMEOUT': 60 * 60 * 24,
+#    'CACHE_DIR': '/tmp/pandasdatasource_cache',
+#    'CACHE_THRESHOLD': 200}
+dataframe_cache = utils.setup_cache(app, conf.get('DATAFRAME_CACHE_CONFIG'))
 
 migrate = Migrate(app, db, directory=APP_DIR + "/migrations")