diff --git a/sql/utils/go_data_masking.py b/sql/utils/go_data_masking.py index e398692a57..bca380b15f 100644 --- a/sql/utils/go_data_masking.py +++ b/sql/utils/go_data_masking.py @@ -21,10 +21,10 @@ def go_data_masking(instance, db_name, sql, sql_result): """脱敏数据""" try: if SysConfig().get('query_check'): - # 解析查询语句,禁用部分Inception无法解析关键词 + # 解析查询语句,禁用部分goInception无法解析关键词,先放着空吧,,,,也许某天用上了,:) p = sqlparse.parse(sql)[0] for token in p.tokens: - if token.ttype is Keyword and token.value.upper() in ['UNION', 'UNION ALL']: + if token.ttype is Keyword and token.value.upper() in ['']: logger.warning(f'数据脱敏异常,错误信息:不支持该查询语句脱敏!请联系管理员') sql_result.error = '不支持该查询语句脱敏!请联系管理员' sql_result.status = 1 @@ -32,6 +32,8 @@ def go_data_masking(instance, db_name, sql, sql_result): # 通过Inception获取语法树,并进行解析 inception_engine = GoInceptionEngine() query_tree = inception_engine.query_datamasking(instance=instance, db_name=db_name, sql=sql) + #去重,避免后面循环字段数量大于结果集中字段数量 + query_tree=DelRepeat(query_tree,'index') # 分析语法树获取命中脱敏规则的列数据 table_hit_columns, hit_columns = analyze_query_tree(query_tree, instance) @@ -137,6 +139,22 @@ def analyze_query_tree(query_tree, instance): return table_hit_columns, hit_columns +def DelRepeat(data,key): + """输入的 data 是inception_engine.query_datamasking的list结果, + 输入的 key 是上面 data中index 字段,用于筛选去重 + 去重前 + [{'index': 0, 'field': 'phone', 'type': 'varchar(80)', 'table': 'users', 'schema': 'db1', 'alias': 'phone'}, {'index': 0, 'field': 'phone', 'type': 'varchar(80)', 'table': 'users', 'schema': 'db1', 'alias': 'phone'}] + 去重后 + [{'index': 0, 'field': 'phone', 'type': 'varchar(80)', 'table': 'users', 'schema': 'db1', 'alias': 'phone'}] + 返回同样结构的list. + """ + new_data_list = [] + values = [] + for d in data: + if d[key] not in values: + new_data_list.append(d) + values.append(d[key]) + return new_data_list def hit_column(masking_columns, instance, table_schema, table_name, column_name): """判断字段是否命中脱敏规则,如果命中则返回脱敏的规则id和规则类型""" diff --git a/sql/utils/tests.py b/sql/utils/tests.py index c65052db4b..a8be768e9b 100644 --- a/sql/utils/tests.py +++ b/sql/utils/tests.py @@ -1240,8 +1240,8 @@ def test_go_data_masking_hit_rules_star_and_column_and_star(self, _inception): self.assertEqual(r.rows, mask_result_rows) @patch('sql.utils.go_data_masking.GoInceptionEngine') - def test_go_data_masking_does_not_support_aggregate(self, _inception): - """不支持的语法""" + def test_go_data_masking_concat_function_support(self, _inception): + """concat_函数支持""" _inception.return_value.query_datamasking.return_value = [ {"index":0,"field":"phone","type":"varchar(80)","table":"users","schema":"archer_test","alias":"concat(phone,1)"} ] @@ -1250,12 +1250,12 @@ def test_go_data_masking_does_not_support_aggregate(self, _inception): query_result = ReviewSet(column_list=['concat(phone,1)'], rows=rows, full_sql=sql) r = go_data_masking(self.ins, 'archery', sql, query_result) mask_result_rows = [['188****8888', ], ['188****8889', ], ['188****8810', ]] - print("test_go_data_masking_does_not_support_aggregate",r.rows) + print("test_go_data_masking_concat_function_support",r.rows) self.assertEqual(r.rows, mask_result_rows) @patch('sql.utils.go_data_masking.GoInceptionEngine') - def test_go_data_masking_does_not_support_fuc(self, _inception): - """不支持的语法""" + def test_go_data_masking_max_function_support(self, _inception): + """max_函数支持""" _inception.return_value.query_datamasking.return_value = [ {"index":0,"field":"phone","type":"varchar(80)","table":"users","schema":"archer_test","alias":"max(phone+1)"} ] @@ -1264,21 +1264,26 @@ def test_go_data_masking_does_not_support_fuc(self, _inception): query_result = ReviewSet(column_list=['max(phone+1)'], rows=rows, full_sql=sql) mask_result_rows = [['188****8888', ], ['188****8889', ], ['188****8810', ]] r = go_data_masking(self.ins, 'archery', sql, query_result) - print("test_go_data_masking_does_not_support_fuc",r.rows) + print("test_go_data_masking_max_function_support",r.rows) self.assertEqual(r.rows, mask_result_rows) - def test_go_data_masking_does_not_support_keyword(self, ): - """不支持的关键字""" + @patch('sql.utils.go_data_masking.GoInceptionEngine') + def test_go_data_masking_union_support_keyword(self, _inception): + """union关键字""" self.sys_config.set('query_check', 'true') self.sys_config.get_all_config() - - sqls = ["select id from test union select email from activity_email_all_in_one;", - "select id from test union all select email from activity_email_all_in_one;"] + _inception.return_value.query_datamasking.return_value = [ + {"index":0,"field":"phone","type":"varchar(80)","table":"users","schema":"archer_test","alias":"phone"} + ] + sqls = ["select phone from test union select phone from activity_email_all_in_one;", + "select phone from test union all select phone from activity_email_all_in_one;"] + rows = (('18888888888',), ('18888888889',), ('18888888810',)) + mask_result_rows = [['188****8888', ], ['188****8889', ], ['188****8810', ]] for sql in sqls: - query_result = ReviewSet(full_sql=sql) + query_result = ReviewSet(column_list=['phone'], rows=rows, full_sql=sql) r = go_data_masking(self.ins, 'archery', sql, query_result) - print("test_go_data_masking_does_not_support_keyword",r.rows) - self.assertEqual(r.error, '不支持该查询语句脱敏!请联系管理员') + print("test_go_data_masking_union_support_keyword",r.rows) + self.assertEqual(r.rows, mask_result_rows) def test_brute_mask(self):