diff --git a/ingestion/src/metadata/ingestion/lineage/parser.py b/ingestion/src/metadata/ingestion/lineage/parser.py index 6bd4386ee0a1..5b74bfba2d70 100644 --- a/ingestion/src/metadata/ingestion/lineage/parser.py +++ b/ingestion/src/metadata/ingestion/lineage/parser.py @@ -98,24 +98,30 @@ def intermediate_tables(self) -> List[Table]: """ Get a list of intermediate tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.intermediate_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.intermediate_tables) + return [] @cached_property def source_tables(self) -> List[Table]: """ Get a list of source tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.source_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.source_tables) + return [] @cached_property def target_tables(self) -> List[Table]: """ Get a list of target tables """ - # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning - return self.retrieve_tables(self.parser.target_tables) + if self.parser: + # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning + return self.retrieve_tables(self.parser.target_tables) + return [] # pylint: disable=protected-access @cached_property @@ -124,6 +130,8 @@ def column_lineage(self) -> List[Tuple[Column, Column]]: Get a list of tuples of column lineage """ column_lineage = [] + if self.parser is None: + return [] try: if self.parser._dialect == SQLPARSE_DIALECT: return self.parser.get_column_lineage() @@ -331,6 +339,8 @@ def table_joins(self) -> Dict[str, List[TableColumnJoin]]: :return: for each table name, list all joins against other tables """ join_data = defaultdict(list) + if self.parser is None: + return join_data # These are @lazy_property, not properly being picked up by IDEs. Ignore the warning for statement in self.parser.statements(): self.stateful_add_joins_from_statement(join_data, sql_statement=statement) @@ -378,7 +388,11 @@ def clean_raw_query(cls, raw_query: str) -> Optional[str]: @staticmethod def _evaluate_best_parser( query: str, dialect: Dialect, timeout_seconds: int - ) -> LineageRunner: + ) -> Optional[LineageRunner]: + + if query is None: + return None + @timeout(seconds=timeout_seconds) def get_sqlfluff_lineage_runner(qry: str, dlct: str) -> LineageRunner: lr_dialect = LineageRunner(qry, dialect=dlct) diff --git a/ingestion/tests/unit/test_query_parser.py b/ingestion/tests/unit/test_query_parser.py index 4b2baa617963..12fdc806b640 100644 --- a/ingestion/tests/unit/test_query_parser.py +++ b/ingestion/tests/unit/test_query_parser.py @@ -206,7 +206,7 @@ def test_ctes_column_lineage(self): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS WITH cte_table AS ( SELECT USERS.ID, @@ -226,7 +226,6 @@ def test_ctes_column_lineage(self): ; """ - expected_tables = {"testdb.public.users"} expected_lineage = [ ( Column("testdb.public.users.id"), @@ -258,7 +257,7 @@ def test_table_with_single_comment(self): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS SELECT ID, -- A comment here @@ -295,7 +294,7 @@ def test_table_with_aliases(self): """ Validate we obtain information from Comon Table Expressions """ - query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS + query = """CREATE TABLE TESTDB.PUBLIC.TARGET AS SELECT ID AS new_identifier, NAME new_name @@ -329,3 +328,31 @@ def test_table_with_aliases(self): parser.column_lineage, expected_lineage, ) + + def test_copy_query(self): + """ + Validate Copy query is skipped appropriately without any errors + """ + query = """COPY MY_TABLE col1,col2,col3 + FROM 's3://bucket/schema/table.csv' + WITH CREDENTIALS '' + REGION 'US-east-2' + """ + expected_lineage = [] + expected_tables = set() + + parser = LineageParser(query) + tables = {str(table) for table in parser.involved_tables} + self.assertEqual(tables, expected_tables) + self.assertEqual( + parser.column_lineage, + expected_lineage, + ) + + parser = LineageParser(query, Dialect.MYSQL) + tables = {str(table) for table in parser.involved_tables} + self.assertEqual(tables, expected_tables) + self.assertEqual( + parser.column_lineage, + expected_lineage, + )