Let initialisation from dicts use insertion order for python >= 3.6 (…

…part II) (pandas-dev#19859)
harisbal · Feb 28, 2018 · ab0bcfc · ab0bcfc
1 parent f8a3e72
commit ab0bcfc
Show file tree

Hide file tree

Showing 11 changed files with 86 additions and 77 deletions.
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -99,9 +99,9 @@ def max_value(group):
 
         applied = df.groupby('A').apply(max_value)
         result = applied.get_dtype_counts().sort_values()
-        expected = Series({'object': 2,
-                           'float64': 2,
-                           'int64': 1}).sort_values()
+        expected = Series({'float64': 2,
+                           'int64': 1,
+                           'object': 2}).sort_values()
         assert_series_equal(result, expected)
 
     def test_groupby_return_type(self):
@@ -244,7 +244,7 @@ def func_with_no_date(batch):
             return pd.Series({'c': 2})
 
         def func_with_date(batch):
-            return pd.Series({'c': 2, 'b': datetime(2015, 1, 1)})
+            return pd.Series({'b': datetime(2015, 1, 1), 'c': 2})
 
         dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date)
         dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1])
@@ -1628,8 +1628,8 @@ def f(g):
 
     def test_apply_with_mixed_dtype(self):
         # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
-        df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'],
-                        'foo2': np.random.randn(6)})
+        df = DataFrame({'foo1': np.random.randn(6),
+                        'foo2': ['one', 'two', 'two', 'three', 'one', 'two']})
         result = df.apply(lambda x: x, axis=1)
         assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts())
 
@@ -2113,10 +2113,10 @@ def test_multifunc_sum_bug(self):
 
     def test_handle_dict_return_value(self):
         def f(group):
-            return {'min': group.min(), 'max': group.max()}
+            return {'max': group.max(), 'min': group.min()}
 
         def g(group):
-            return Series({'min': group.min(), 'max': group.max()})
+            return Series({'max': group.max(), 'min': group.min()})
 
         result = self.df.groupby('A')['C'].apply(f)
         expected = self.df.groupby('A')['C'].apply(g)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
@@ -519,7 +519,9 @@ def test_cython_transform_frame(self, op, args, targop):
                         'timedelta': pd.timedelta_range(1, freq='s',
                                                         periods=1000),
                         'string': strings * 50,
-                        'string_missing': strings_missing * 50})
+                        'string_missing': strings_missing * 50},
+                       columns=['float', 'float_missing', 'int', 'datetime',
+                                'timedelta', 'string', 'string_missing'])
         df['cat'] = df['string'].astype('category')
 
         df2 = df.copy()
@@ -552,7 +554,9 @@ def test_cython_transform_frame(self, op, args, targop):
                 tm.assert_frame_equal(expected,
                                       gb.transform(op, *args).sort_index(
                                           axis=1))
-                tm.assert_frame_equal(expected, getattr(gb, op)(*args))
+                tm.assert_frame_equal(
+                    expected,
+                    getattr(gb, op)(*args).sort_index(axis=1))
                 # individual columns
                 for c in df:
                     if c not in ['float', 'int', 'float_missing'

diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py
@@ -53,13 +53,15 @@ def test_ix_loc_setitem_consistency(self):
 
         # GH 8607
         # ix setitem consistency
-        df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580],
-                        'delta': [1174, 904, 161],
-                        'elapsed': [7673, 9277, 1470]})
-        expected = DataFrame({'timestamp': pd.to_datetime(
-            [1413840976, 1413842580, 1413760580], unit='s'),
-            'delta': [1174, 904, 161],
-            'elapsed': [7673, 9277, 1470]})
+        df = DataFrame({'delta': [1174, 904, 161],
+                        'elapsed': [7673, 9277, 1470],
+                        'timestamp': [1413840976, 1413842580, 1413760580]})
+        expected = DataFrame({'delta': [1174, 904, 161],
+                              'elapsed': [7673, 9277, 1470],
+                              'timestamp': pd.to_datetime(
+                                  [1413840976, 1413842580, 1413760580],
+                                  unit='s')
+                              })
 
         df2 = df.copy()
         df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')

diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -539,8 +539,8 @@ def test_east_asian_unicode_frame(self):
         assert _rep(df) == expected
 
         # column name
-        df = DataFrame({u'あああああ': [1, 222, 33333, 4],
-                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+        df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'],
+                        u'あああああ': [1, 222, 33333, 4]},
                        index=['a', 'bb', 'c', 'ddd'])
         expected = (u"          b  あああああ\na         あ      1\n"
                     u"bb      いいい    222\nc         う  33333\n"
@@ -647,8 +647,8 @@ def test_east_asian_unicode_frame(self):
             assert _rep(df) == expected
 
             # column name
-            df = DataFrame({u'あああああ': [1, 222, 33333, 4],
-                            'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+            df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'],
+                            u'あああああ': [1, 222, 33333, 4]},
                            index=['a', 'bb', 'c', 'ddd'])
             expected = (u"                b  あああああ\n"
                         u"a              あ           1\n"
@@ -733,8 +733,8 @@ def test_east_asian_unicode_frame(self):
                 assert _rep(df) == expected
 
             # ambiguous unicode
-            df = DataFrame({u'あああああ': [1, 222, 33333, 4],
-                            'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']},
+            df = DataFrame({'b': [u'あ', u'いいい', u'¡¡', u'ええええええ'],
+                            u'あああああ': [1, 222, 33333, 4]},
                            index=['a', 'bb', 'c', '¡¡¡'])
             expected = (u"                b  あああああ\n"
                         u"a              あ           1\n"

diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -115,17 +115,18 @@ def test_to_latex_empty(self):
         assert result == expected
 
     def test_to_latex_with_formatters(self):
-        df = DataFrame({'int': [1, 2, 3],
+        df = DataFrame({'datetime64': [datetime(2016, 1, 1),
+                                       datetime(2016, 2, 5),
+                                       datetime(2016, 3, 3)],
                         'float': [1.0, 2.0, 3.0],
+                        'int': [1, 2, 3],
                         'object': [(1, 2), True, False],
-                        'datetime64': [datetime(2016, 1, 1),
-                                       datetime(2016, 2, 5),
-                                       datetime(2016, 3, 3)]})
+                        })
 
-        formatters = {'int': lambda x: '0x{x:x}'.format(x=x),
+        formatters = {'datetime64': lambda x: x.strftime('%Y-%m'),
                       'float': lambda x: '[{x: 4.1f}]'.format(x=x),
+                      'int': lambda x: '0x{x:x}'.format(x=x),
                       'object': lambda x: '-{x!s}-'.format(x=x),
-                      'datetime64': lambda x: x.strftime('%Y-%m'),
                       '__index__': lambda x: 'index: {x}'.format(x=x)}
         result = df.to_latex(formatters=dict(formatters))
 
@@ -347,10 +348,10 @@ def test_to_latex_escape(self):
         a = 'a'
         b = 'b'
 
-        test_dict = {u('co^l1'): {a: "a",
-                                  b: "b"},
-                     u('co$e^x$'): {a: "a",
-                                    b: "b"}}
+        test_dict = {u('co$e^x$'): {a: "a",
+                                    b: "b"},
+                     u('co^l1'): {a: "a",
+                                  b: "b"}}
 
         unescaped_result = DataFrame(test_dict).to_latex(escape=False)
         escaped_result = DataFrame(test_dict).to_latex(

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -553,7 +553,7 @@ def __str__(self):
 
     def test_label_overflow(self):
         # GH14256: buffer length not checked when writing label
-        df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]})
+        df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]})
         assert df.to_json() == \
             '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format(
                 bar=('bar' * 100000))

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -588,18 +588,18 @@ def test_merge_on_datetime64tz(self):
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
 
-        left = pd.DataFrame({'value': pd.date_range('20151010', periods=2,
-                                                    tz='US/Eastern'),
-                             'key': [1, 2]})
-        right = pd.DataFrame({'value': pd.date_range('20151011', periods=2,
-                                                     tz='US/Eastern'),
-                              'key': [2, 3]})
+        left = pd.DataFrame({'key': [1, 2],
+                             'value': pd.date_range('20151010', periods=2,
+                                                    tz='US/Eastern')})
+        right = pd.DataFrame({'key': [2, 3],
+                              'value': pd.date_range('20151011', periods=2,
+                                                     tz='US/Eastern')})
         expected = DataFrame({
+            'key': [1, 2, 3],
             'value_x': list(pd.date_range('20151010', periods=2,
                                           tz='US/Eastern')) + [pd.NaT],
             'value_y': [pd.NaT] + list(pd.date_range('20151011', periods=2,
-                                                     tz='US/Eastern')),
-            'key': [1, 2, 3]})
+                                                     tz='US/Eastern'))})
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
         assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]'
@@ -632,31 +632,32 @@ def test_merge_on_periods(self):
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
 
-        left = pd.DataFrame({'value': pd.period_range('20151010', periods=2,
-                                                      freq='D'),
-                             'key': [1, 2]})
-        right = pd.DataFrame({'value': pd.period_range('20151011', periods=2,
-                                                       freq='D'),
-                              'key': [2, 3]})
+        left = pd.DataFrame({'key': [1, 2],
+                             'value': pd.period_range('20151010', periods=2,
+                                                      freq='D')})
+        right = pd.DataFrame({'key': [2, 3],
+                              'value': pd.period_range('20151011', periods=2,
+                                                       freq='D')})
 
         exp_x = pd.period_range('20151010', periods=2, freq='D')
         exp_y = pd.period_range('20151011', periods=2, freq='D')
-        expected = DataFrame({'value_x': list(exp_x) + [pd.NaT],
-                              'value_y': [pd.NaT] + list(exp_y),
-                              'key': [1, 2, 3]})
+        expected = DataFrame({'key': [1, 2, 3],
+                              'value_x': list(exp_x) + [pd.NaT],
+                              'value_y': [pd.NaT] + list(exp_y)})
         result = pd.merge(left, right, on='key', how='outer')
         assert_frame_equal(result, expected)
         assert result['value_x'].dtype == 'object'
         assert result['value_y'].dtype == 'object'
 
     def test_indicator(self):
         # PR #10054. xref #7412 and closes #8790.
-        df1 = DataFrame({'col1': [0, 1], 'col_left': [
-                        'a', 'b'], 'col_conflict': [1, 2]})
+        df1 = DataFrame({'col1': [0, 1], 'col_conflict': [1, 2],
+                         'col_left': ['a', 'b']})
         df1_copy = df1.copy()
 
-        df2 = DataFrame({'col1': [1, 2, 3, 4, 5], 'col_right': [2, 2, 2, 2, 2],
-                         'col_conflict': [1, 2, 3, 4, 5]})
+        df2 = DataFrame({'col1': [1, 2, 3, 4, 5],
+                         'col_conflict': [1, 2, 3, 4, 5],
+                         'col_right': [2, 2, 2, 2, 2]})
         df2_copy = df2.copy()
 
         df_result = DataFrame({

diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py
@@ -83,9 +83,10 @@ def test_empty_sequence_concat(self):
         pd.concat([pd.DataFrame(), None])
 
     def test_doc_example(self):
-        left = DataFrame({'key': ['a', 'c', 'e', 'a', 'c', 'e'],
+        left = DataFrame({'group': list('aaabbb'),
+                          'key': ['a', 'c', 'e', 'a', 'c', 'e'],
                           'lvalue': [1, 2, 3] * 2,
-                          'group': list('aaabbb')})
+                          })
 
         right = DataFrame({'key': ['b', 'c', 'd'],
                            'rvalue': [1, 2, 3]})

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -1542,10 +1542,10 @@ def test_concat_bug_2972(self):
     def test_concat_bug_3602(self):
 
         # GH 3602, duplicate columns
-        df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'stringvar': [
-                        'rrr', 'rrr', 'rrr', 'rrr'], 'prc': [6, 6, 6, 6]})
-        df2 = DataFrame({'misc': [1, 2, 3, 4], 'prc': [
-                        6, 6, 6, 6], 'C': [9, 10, 11, 12]})
+        df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6],
+                         'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']})
+        df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4],
+                         'prc': [6, 6, 6, 6]})
         expected = DataFrame([[0, 6, 'rrr', 9, 1, 6],
                               [0, 6, 'rrr', 10, 2, 6],
                               [0, 6, 'rrr', 11, 3, 6],

diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -589,11 +589,11 @@ def test_nonnumeric_suffix(self):
 
     def test_mixed_type_suffix(self):
         df = pd.DataFrame({
-            'treatment_1': [1.0, 2.0],
-            'treatment_foo': [3.0, 4.0],
-            'result_foo': [5.0, 6.0],
+            'A': ['X1', 'X2'],
             'result_1': [0, 9],
-            'A': ['X1', 'X2']})
+            'result_foo': [5.0, 6.0],
+            'treatment_1': [1.0, 2.0],
+            'treatment_foo': [3.0, 4.0]})
         expected = pd.DataFrame({
             'A': ['X1', 'X2', 'X1', 'X2'],
             'colname': ['1', '1', 'foo', 'foo'],

diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
@@ -100,8 +100,8 @@ def test_basic_types(self, sparse, dtype):
         expected_counts = {'int64': 1, 'object': 1}
         expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0)
 
-        expected = Series(expected_counts).sort_values()
-        tm.assert_series_equal(result.get_dtype_counts().sort_values(),
+        expected = Series(expected_counts).sort_index()
+        tm.assert_series_equal(result.get_dtype_counts().sort_index(),
                                expected)
 
     def test_just_na(self, sparse):
@@ -212,10 +212,10 @@ def test_dataframe_dummies_prefix_str(self, df, sparse):
     def test_dataframe_dummies_subset(self, df, sparse):
         result = get_dummies(df, prefix=['from_A'], columns=['A'],
                              sparse=sparse)
-        expected = DataFrame({'from_A_a': [1, 0, 1],
-                              'from_A_b': [0, 1, 0],
-                              'B': ['b', 'b', 'c'],
-                              'C': [1, 2, 3]}, dtype=np.uint8)
+        expected = DataFrame({'B': ['b', 'b', 'c'],
+                              'C': [1, 2, 3],
+                              'from_A_a': [1, 0, 1],
+                              'from_A_b': [0, 1, 0]}, dtype=np.uint8)
         expected[['C']] = df[['C']]
         assert_frame_equal(result, expected)
 
@@ -249,16 +249,16 @@ def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse):
 
     def test_dataframe_dummies_prefix_dict(self, sparse):
         prefixes = {'A': 'from_A', 'B': 'from_B'}
-        df = DataFrame({'A': ['a', 'b', 'a'],
-                        'B': ['b', 'b', 'c'],
-                        'C': [1, 2, 3]})
+        df = DataFrame({'C': [1, 2, 3],
+                        'A': ['a', 'b', 'a'],
+                        'B': ['b', 'b', 'c']})
         result = get_dummies(df, prefix=prefixes, sparse=sparse)
 
-        expected = DataFrame({'from_A_a': [1, 0, 1],
+        expected = DataFrame({'C': [1, 2, 3],
+                              'from_A_a': [1, 0, 1],
                               'from_A_b': [0, 1, 0],
                               'from_B_b': [1, 1, 0],
-                              'from_B_c': [0, 0, 1],
-                              'C': [1, 2, 3]})
+                              'from_B_c': [0, 0, 1]})
 
         columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
         expected[columns] = expected[columns].astype(np.uint8)