Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(db engines): add support for Opendistro Elasticsearch (AWS ES) #12602

Merged
merged 7 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def get_git_sha():
"drill": ["sqlalchemy-drill==0.1.dev"],
"druid": ["pydruid>=0.6.1,<0.7"],
"solr": ["sqlalchemy-solr >= 0.2.0"],
"elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"],
"elasticsearch": ["elasticsearch-dbapi>=0.2.0, <0.3.0"],
"exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"],
"excel": ["xlrd>=1.2.0, <1.3"],
"gsheets": ["gsheetsdb>=0.1.9"],
Expand Down
31 changes: 31 additions & 0 deletions superset/db_engine_specs/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,34 @@ def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
if target_type.upper() == utils.TemporalType.DATETIME:
return f"""CAST('{dttm.isoformat(timespec="seconds")}' AS DATETIME)"""
return None


class OpenDistroEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method

time_groupby_inline = True
time_secondary_columns = True
allows_joins = False
allows_subqueries = True

_time_grain_expressions = {
None: "{col}",
"PT1S": "date_format({col}, 'yyyy-MM-dd HH:mm:ss.000')",
"PT1M": "date_format({col}, 'yyyy-MM-dd HH:mm:00.000')",
"PT1H": "date_format({col}, 'yyyy-MM-dd HH:00:00.000')",
"P1D": "date_format({col}, 'yyyy-MM-dd 00:00:00.000')",
"P1M": "date_format({col}, 'yyyy-MM-01 00:00:00.000')",
"P1Y": "date_format({col}, 'yyyy-01-01 00:00:00.000')",
}

engine = "odelasticsearch"
engine_name = "ElasticSearch"

@classmethod
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
if target_type.upper() == utils.TemporalType.DATETIME:
return f"""'{dttm.isoformat(timespec="seconds")}'"""
return None
Comment on lines +71 to +75
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't ES/OD support other temporal types like DATE or TIMESTAMP'?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To my understanding by default Elasticsearch only supports date field type

https://opendistro.github.io/for-elasticsearch-docs/docs/sql/datatypes/#date-and-time-types

...
By default, the Elasticsearch DSL uses the date type as the only date-time related type that contains all information of an absolute time point.
...

For the SQL endpoint, we can use date functions that will return other date/time types out of them

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok thanks for clarifying!


@staticmethod
def _mutate_label(label: str) -> str:
return label.replace(".", "_")
30 changes: 29 additions & 1 deletion tests/db_engine_specs/elasticsearch_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from superset.db_engine_specs.elasticsearch import ElasticSearchEngineSpec
from sqlalchemy import column

from superset.db_engine_specs.elasticsearch import (
ElasticSearchEngineSpec,
OpenDistroEngineSpec,
)
from tests.db_engine_specs.base_tests import TestDbEngineSpec


Expand All @@ -26,3 +31,26 @@ def test_convert_dttm(self):
ElasticSearchEngineSpec.convert_dttm("DATETIME", dttm),
"CAST('2019-01-02T03:04:05' AS DATETIME)",
)

def test_opendistro_convert_dttm(self):
"""
DB Eng Specs (opendistro): Test convert_dttm
"""
dttm = self.get_dttm()

self.assertEqual(
OpenDistroEngineSpec.convert_dttm("DATETIME", dttm),
"'2019-01-02T03:04:05'",
)

def test_opendistro_sqla_column_label(self):
"""
DB Eng Specs (opendistro): Test column label
"""
test_cases = {
"Col": "Col",
"Col.keyword": "Col_keyword",
}
for original, expected in test_cases.items():
actual = OpenDistroEngineSpec.make_label_compatible(column(original).name)
self.assertEqual(actual, expected)