-
Notifications
You must be signed in to change notification settings - Fork 14.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Load async sql lab results early for Presto #4834
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,7 +250,7 @@ | |
MAPBOX_API_KEY = os.environ.get('MAPBOX_API_KEY', '') | ||
|
||
# Maximum number of rows returned in the SQL editor | ||
SQL_MAX_ROW = 1000000 | ||
SQL_MAX_ROW = 10000 | ||
DISPLAY_SQL_MAX_ROW = 1000 | ||
|
||
# Maximum number of tables/views displayed in the dropdown window in SQL Lab. | ||
|
@@ -294,6 +294,12 @@ class CeleryConfig(object): | |
# Timeout duration for SQL Lab synchronous queries | ||
SQLLAB_TIMEOUT = 30 | ||
|
||
# When set to true, results from asynchronous sql lab are prefetched | ||
PREFETCH_ASYNC = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be a db-level param? |
||
|
||
# Howmany rows to prefetch from asyncronous queries | ||
PREFETCH_ROWS = 100 | ||
|
||
# SQLLAB_DEFAULT_DBID | ||
SQLLAB_DEFAULT_DBID = None | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,11 +20,13 @@ | |
|
||
from collections import defaultdict, namedtuple | ||
import inspect | ||
import json | ||
import logging | ||
import os | ||
import re | ||
import textwrap | ||
import time | ||
import uuid | ||
|
||
import boto3 | ||
from flask import g | ||
|
@@ -40,7 +42,7 @@ | |
import unicodecsv | ||
from werkzeug.utils import secure_filename | ||
|
||
from superset import app, cache_util, conf, db, utils | ||
from superset import app, cache_util, conf, db, results_backend, utils | ||
from superset.exceptions import SupersetTemplateException | ||
from superset.utils import QueryStatus | ||
|
||
|
@@ -72,8 +74,8 @@ class BaseEngineSpec(object): | |
inner_joins = True | ||
|
||
@classmethod | ||
def fetch_data(cls, cursor, limit): | ||
if cls.limit_method == LimitMethod.FETCH_MANY: | ||
def fetch_data(cls, cursor, limit, prefetch=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does fetch_data need a new arg or does it just need to be called with a limit? |
||
if cls.limit_method == LimitMethod.FETCH_MANY or prefetch: | ||
return cursor.fetchmany(limit) | ||
return cursor.fetchall() | ||
|
||
|
@@ -726,7 +728,29 @@ def extra_table_metadata(cls, database, table_name, schema_name): | |
} | ||
|
||
@classmethod | ||
def handle_cursor(cls, cursor, query, session): | ||
def prefetch_results(cls, cursor, query, cache_timeout, session, limit): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It appears this method is not covered by tests |
||
data = cursor.fetchmany(limit) | ||
column_names = cls.get_normalized_column_names(cursor.description) | ||
cdf = utils.convert_results_to_df(column_names, data) | ||
payload = dict(query_id=query.id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much of the logic here is not specific to Presto and should probably live in the base class or outside this module. Maybe something like |
||
payload.update({ | ||
'status': utils.QueryStatus.PREFETCHED, | ||
'data': cdf.data if cdf.data else [], | ||
'columns': cdf.columns if cdf.columns else [], | ||
'query': query.to_dict(), | ||
}) | ||
|
||
json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) | ||
key = '{}'.format(uuid.uuid4()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to format this as a string as it's already a string. |
||
prefetch_key = key | ||
results_backend.set( | ||
prefetch_key, utils.zlib_compress(json_payload), cache_timeout) | ||
query.status = utils.QueryStatus.PREFETCHED | ||
query.results_key = key | ||
session.commit() | ||
|
||
@classmethod | ||
def handle_cursor(cls, cursor, query, session, cache_timeout=0): | ||
"""Updates progress information""" | ||
logging.info('Polling the cursor for progress') | ||
polled = cursor.poll() | ||
|
@@ -737,12 +761,20 @@ def handle_cursor(cls, cursor, query, session): | |
while polled: | ||
# Update the object and wait for the kill signal. | ||
stats = polled.get('stats', {}) | ||
|
||
query = session.query(type(query)).filter_by(id=query.id).one() | ||
if query.status in [QueryStatus.STOPPED, QueryStatus.TIMED_OUT]: | ||
cursor.cancel() | ||
break | ||
|
||
if ( | ||
config.get('PREFETCH_ASYNC') and | ||
(not query.has_loaded_early) | ||
): | ||
query.has_loaded_early = True | ||
limit = config.get('PREFETCH_ROWS') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
PrestoEngineSpec.prefetch_results( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this possible that since |
||
cursor, query, cache_timeout, session, limit) | ||
|
||
if stats: | ||
completed_splits = float(stats.get('completedSplits')) | ||
total_splits = float(stats.get('totalSplits')) | ||
|
@@ -1079,6 +1111,8 @@ def handle_cursor(cls, cursor, query, session): | |
if query.status == QueryStatus.STOPPED: | ||
cursor.cancel() | ||
break | ||
if hive.ttypes.TOperationState.RUNNING_STATE == polled.operationState: | ||
BaseEngineSpec.fetch_data(cursor, 100, prefetch=True) | ||
|
||
log = cursor.fetch_logs() or '' | ||
if log: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole section is copy pasted from earlier in this very method. This should be refactored into its own component, or at least a
renderDataSection
method or something like that, that would receive different props/params as needed.