From a165aec822e5014a99b6467ed6d3d87184e13bc4 Mon Sep 17 00:00:00 2001
From: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
Date: Mon, 23 Jul 2018 20:41:38 +0300
Subject: [PATCH] Fix broken dedup and remove redundant db_spec logic (#5467)

* Fix broken dedup and remove redundant db_spec logic

* Add test case
---
 superset/dataframe.py       |  5 ++---
 superset/db_engine_specs.py | 21 ---------------------
 tests/dataframe_test.py     | 12 ++++++++++++
 3 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/superset/dataframe.py b/superset/dataframe.py
index 5fba4ffed6372..30ba4c776bfcc 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -70,12 +70,11 @@ def __init__(self, data, cursor_description, db_engine_spec):
         if cursor_description:
             column_names = [col[0] for col in cursor_description]
 
-        self.column_names = dedup(
-            db_engine_spec.get_normalized_column_names(cursor_description))
+        self.column_names = dedup(column_names)
 
         data = data or []
         self.df = (
-            pd.DataFrame(list(data), columns=column_names).infer_objects())
+            pd.DataFrame(list(data), columns=self.column_names).infer_objects())
 
         self._type_dict = {}
         try:
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 2b7454160de48..cc1345e371a88 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -321,15 +321,6 @@ def get_configuration_for_impersonation(cls, uri, impersonate_user, username):
         """
         return {}
 
-    @classmethod
-    def get_normalized_column_names(cls, cursor_description):
-        columns = cursor_description if cursor_description else []
-        return [cls.normalize_column_name(col[0]) for col in columns]
-
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name
-
     @staticmethod
     def execute(cursor, query, async=False):
         cursor.execute(query)
@@ -402,10 +393,6 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
         Grain('year', _('year'), "DATE_TRUNC('YEAR', {col})", 'P1Y'),
     )
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class VerticaEngineSpec(PostgresBaseEngineSpec):
     engine = 'vertica'
@@ -414,10 +401,6 @@ class VerticaEngineSpec(PostgresBaseEngineSpec):
 class RedshiftEngineSpec(PostgresBaseEngineSpec):
     engine = 'redshift'
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class OracleEngineSpec(PostgresBaseEngineSpec):
     engine = 'oracle'
@@ -440,10 +423,6 @@ def convert_dttm(cls, target_type, dttm):
             """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')"""
         ).format(dttm.isoformat())
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class Db2EngineSpec(BaseEngineSpec):
     engine = 'ibm_db_sa'
diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py
index b56770240b919..fdba431491a1d 100644
--- a/tests/dataframe_test.py
+++ b/tests/dataframe_test.py
@@ -113,3 +113,15 @@ def test_get_columns_type_inference(self):
                 },
             ],
         )
+
+    def test_dedup_with_data(self):
+        data = [
+            ('a', 1),
+            ('a', 2),
+        ]
+        cursor_descr = (
+            ('a', 'string'),
+            ('a', 'string'),
+        )
+        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
+        self.assertListEqual(cdf.column_names, ['a', 'a__1'])