-
Notifications
You must be signed in to change notification settings - Fork 14.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[sqllab] force limit queries only when there is no existing limit #5023
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,15 +104,32 @@ def apply_limit_to_sql(cls, sql, limit, database): | |
) | ||
return database.compile_sqla_query(qry) | ||
elif LimitMethod.FORCE_LIMIT: | ||
no_limit = re.sub(r""" | ||
sql_without_limit = cls.get_query_without_limit(sql) | ||
return '{sql_without_limit} LIMIT {limit}'.format(**locals()) | ||
return sql | ||
|
||
@classmethod | ||
def get_limit_from_sql(cls, sql): | ||
limit_pattern = re.compile(r""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize that this logic existed previously but shouldn't we use something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I explored the sqlparse option but there were no nice way to just get the limit without recursively parsing through the query. |
||
(?ix) # case insensitive, verbose | ||
\s+ # whitespace | ||
LIMIT\s+(\d+) # LIMIT $ROWS | ||
;? # optional semi-colon | ||
(\s|;)*$ # remove trailing spaces tabs or semicolons | ||
""") | ||
matches = limit_pattern.findall(sql) | ||
if matches: | ||
return int(matches[0][0]) | ||
|
||
@classmethod | ||
def get_query_without_limit(cls, sql): | ||
return re.sub(r""" | ||
(?ix) # case insensitive, verbose | ||
\s+ # whitespace | ||
LIMIT\s+\d+ # LIMIT $ROWS | ||
;? # optional semi-colon | ||
(\s|;)*$ # remove trailing spaces tabs or semicolons | ||
""", '', sql) | ||
return '{no_limit} LIMIT {limit}'.format(**locals()) | ||
return sql | ||
|
||
@staticmethod | ||
def csv_to_df(**kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -196,17 +196,43 @@ def test_run_async_query(self): | |
self.assertEqual([{'name': 'Admin'}], df.to_dict(orient='records')) | ||
self.assertEqual(QueryStatus.SUCCESS, query.status) | ||
self.assertTrue('FROM tmp_async_1' in query.select_sql) | ||
self.assertTrue('LIMIT 666' in query.select_sql) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why were these checks removed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
self.assertEqual( | ||
'CREATE TABLE tmp_async_1 AS \nSELECT name FROM ab_role ' | ||
"WHERE name='Admin'", query.executed_sql) | ||
"WHERE name='Admin' LIMIT 666", query.executed_sql) | ||
self.assertEqual(sql_where, query.sql) | ||
self.assertEqual(0, query.rows) | ||
self.assertEqual(666, query.limit) | ||
self.assertEqual(False, query.limit_used) | ||
self.assertEqual(True, query.select_as_cta) | ||
self.assertEqual(True, query.select_as_cta_used) | ||
|
||
def test_run_async_query_with_lower_limit(self): | ||
main_db = self.get_main_database(db.session) | ||
eng = main_db.get_sqla_engine() | ||
sql_where = "SELECT name FROM ab_role WHERE name='Alpha' LIMIT 1" | ||
result = self.run_sql( | ||
main_db.id, sql_where, '5', async='true', tmp_table='tmp_async_2', | ||
cta='true') | ||
assert result['query']['state'] in ( | ||
QueryStatus.PENDING, QueryStatus.RUNNING, QueryStatus.SUCCESS) | ||
|
||
time.sleep(1) | ||
|
||
query = self.get_query_by_id(result['query']['serverId']) | ||
df = pd.read_sql_query(query.select_sql, con=eng) | ||
self.assertEqual(QueryStatus.SUCCESS, query.status) | ||
self.assertEqual([{'name': 'Alpha'}], df.to_dict(orient='records')) | ||
self.assertEqual(QueryStatus.SUCCESS, query.status) | ||
self.assertTrue('FROM tmp_async_2' in query.select_sql) | ||
self.assertEqual( | ||
'CREATE TABLE tmp_async_2 AS \nSELECT name FROM ab_role ' | ||
"WHERE name='Alpha' LIMIT 1", query.executed_sql) | ||
self.assertEqual(sql_where, query.sql) | ||
self.assertEqual(0, query.rows) | ||
self.assertEqual(1, query.limit) | ||
self.assertEqual(True, query.select_as_cta) | ||
self.assertEqual(True, query.select_as_cta_used) | ||
|
||
@staticmethod | ||
def de_unicode_dict(d): | ||
def str_if_basestring(o): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this logic can generate sql like
which is wrong
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which engine is this on?