Skip to content

Commit

Permalink
Configure dataframe cache from settings - see apache#3302
Browse files Browse the repository at this point in the history
  • Loading branch information
rhunwicks committed Oct 10, 2017
1 parent 3610ac0 commit a0657a9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 12 deletions.
Empty file added contrib/cache/__init__.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ def dec(self, key, delta=1):
raise NotImplementedError()


dataframe_cache = DataFrameCache(
cache_dir='/tmp/pandasdatasource_cache',
threshold=200,
default_timeout=24 * 60 * 60,
)
def dataframe(app, config, args, kwargs):
"""Return a DataFrameCache for use by Flask-Cache."""
args.insert(0, config['CACHE_DIR'])
kwargs.update(dict(threshold=config['CACHE_THRESHOLD']))
return DataFrameCache(*args, **kwargs)
14 changes: 7 additions & 7 deletions contrib/connectors/pandas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@
from flask_appbuilder import Model
from flask_babel import lazy_gettext as _

from superset import db, utils, sm
from superset import dataframe_cache, db, utils, sm
from superset.connectors.base.models import (
BaseDatasource, BaseColumn, BaseMetric)
from superset.models.helpers import QueryResult, set_perm
from superset.utils import QueryStatus

from .cache import dataframe_cache

FORMATS = [
('csv', 'csv'),
('html', 'html'),
Expand Down Expand Up @@ -311,8 +309,9 @@ def get_dataframe(self):
and add any calculated columns to the DataFrame.
"""
if self.df is None:
cache_key = self.cache_key
self.df = dataframe_cache.get(cache_key)
if dataframe_cache:
cache_key = self.cache_key
self.df = dataframe_cache.get(cache_key)
if not isinstance(self.df, pd.DataFrame):
self.df = self.pandas_read_method(self.source_url, **self.pandas_read_parameters)

Expand All @@ -324,8 +323,9 @@ def get_dataframe(self):
# Our column names are always strings
self.df.columns = [str(col) for col in self.df.columns]

timeout = self.cache_timeout or self.database.cache_timeout
dataframe_cache.set(cache_key, self.df, timeout)
if dataframe_cache:
timeout = self.cache_timeout or self.database.cache_timeout
dataframe_cache.set(cache_key, self.df, timeout)

calculated_columns = []
for col in self.columns:
Expand Down
7 changes: 7 additions & 0 deletions superset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ def get_js_manifest():

cache = utils.setup_cache(app, conf.get('CACHE_CONFIG'))
tables_cache = utils.setup_cache(app, conf.get('TABLE_NAMES_CACHE_CONFIG'))
# For example:
# DATAFRAME_CACHE_CONFIG = {
# 'CACHE_TYPE': 'contrib.connectors.pandas.cache.dataframe',
# 'CACHE_DEFAULT_TIMEOUT': 60 * 60 * 24,
# 'CACHE_DIR': '/tmp/pandasdatasource_cache',
# 'CACHE_THRESHOLD': 200}
dataframe_cache = utils.setup_cache(app, conf.get('DATAFRAME_CACHE_CONFIG'))

migrate = Migrate(app, db, directory=APP_DIR + "/migrations")

Expand Down

0 comments on commit a0657a9

Please sign in to comment.