diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 78a94976e732d..50e52bd6cc494 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -3,9 +3,9 @@ from pandas.core.algorithms import take_1d try: - from cStringIO import StringIO + from cStringIO import StringIO # noqa:F401 except ImportError: - from io import StringIO + from io import StringIO # noqa:F401 try: from pandas._libs import algos @@ -19,7 +19,6 @@ except ImportError: have_real_test_parallel = False - def test_parallel(num_threads=1): def wrapper(fname): @@ -153,7 +152,6 @@ def time_groups_8(self): self._pg8_groups() - class nogil_take1d_float64(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 13b5cd2b06032..ffe7fb51edcf7 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,3 +1,5 @@ +import numpy as np + from .pandas_vb_common import * from string import ascii_letters, digits from itertools import product @@ -17,7 +19,8 @@ def time_groupby_agg_builtins1(self): def time_groupby_agg_builtins2(self): self.df.groupby(['jim', 'joe']).agg([sum, min, max]) -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # dict return values class groupby_apply_dict_return(object): @@ -32,7 +35,7 @@ def time_groupby_apply_dict_return(self): self.data.groupby(self.labels).apply(self.f) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # groups class Groups(object): @@ -41,7 +44,7 @@ class Groups(object): size = 2 ** 22 data = { 'int64_small': Series(np.random.randint(0, 100, size=size)), - 'int64_large' : Series(np.random.randint(0, 10000, size=size)), + 'int64_large': Series(np.random.randint(0, 10000, size=size)), 'object_small': Series(tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size))), 'object_large': Series(tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size))) } @@ -56,7 +59,7 @@ def time_groupby_groups(self, df): self.df.groupby(self.df).groups -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # First / last functions class FirstLast(object): @@ -72,7 +75,7 @@ def setup(self, dtype): if dtype == 'datetime': self.df = DataFrame( {'values': date_range('1/1/2011', periods=100000, freq='s'), - 'key': range(100000),}) + 'key': range(100000)}) elif dtype == 'object': self.df = DataFrame( {'values': (['foo'] * 100000), @@ -98,7 +101,7 @@ def time_groupby_nth_none(self, dtype): self.df.groupby('key').nth(0) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # DataFrame Apply overhead class groupby_frame_apply(object): @@ -138,7 +141,7 @@ def time_groupby_frame_apply_df_copy_overhead(self): self.df.groupby('key').apply(self.df_copy_function) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # 2d grouping, aggregate many columns class groupby_frame_cython_many_columns(object): @@ -152,7 +155,7 @@ def time_sum(self): self.df.groupby(self.labels).sum() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # single key, long, integer key class groupby_frame_singlekey_integer(object): @@ -167,7 +170,7 @@ def time_sum(self): self.df.groupby(self.labels).sum() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # DataFrame nth class groupby_nth(object): @@ -189,7 +192,7 @@ def time_groupby_series_nth_none(self): self.df[1].groupby(self.df[0]).nth(0) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # groupby_indices replacement, chop up Series class groupby_indices(object): @@ -226,7 +229,7 @@ def time_groupby_int64_overflow(self): self.df.groupby(list('abcde')).max() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # count() speed class groupby_multi_count(object): @@ -269,7 +272,7 @@ def time_groupby_int_count(self): self.df.groupby(['key1', 'key2']).count() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # nunique() speed class groupby_nunique(object): @@ -285,7 +288,7 @@ def time_groupby_nunique(self): self.df.groupby(['key1', 'key2']).nunique() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group with different functions per column class groupby_agg_multi(object): @@ -356,7 +359,7 @@ def time_groupby_series_simple_rank(self): self.df.groupby('key1').rank(pct=True) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # size() speed class groupby_size(object): @@ -386,8 +389,7 @@ def time_groupby_size(self): self.draws.groupby(self.cats).size() - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # groupby with a variable value for ngroups class GroupBySuite(object): @@ -582,7 +584,8 @@ def make_grouper(self, N): return pd.date_range('1900-01-01', freq='D', periods=N, tz='US/Central') -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # Series.value_counts class series_value_counts(object): @@ -607,7 +610,7 @@ def time_value_counts_strings(self): self.s.value_counts() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # pivot_table class groupby_pivot_table(object): @@ -624,7 +627,7 @@ def time_groupby_pivot_table(self): self.df.pivot_table(index='key1', columns=['key2', 'key3']) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Sum booleans #2692 class groupby_sum_booleans(object): @@ -638,7 +641,7 @@ def time_groupby_sum_booleans(self): self.df.groupby('ii').sum() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # multi-indexed group sum #9049 class groupby_sum_multiindex(object): @@ -652,7 +655,7 @@ def time_groupby_sum_multiindex(self): self.df.groupby(level=[0, 1]).sum() -#------------------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Transform testing class Transform(object): @@ -710,8 +713,6 @@ def time_transform_multi_key4(self): self.df4.groupby(['jim', 'joe'])['jolie'].transform('max') - - np.random.seed(0) N = 120000 N_TRANSITIONS = 1400 @@ -723,9 +724,6 @@ def time_transform_multi_key4(self): df = DataFrame({'signal': np.random.rand(N), }) - - - class groupby_transform_series(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index f271b82c758ee..7fd8c521eb794 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -107,7 +107,7 @@ def setup(self): # duptes self.idx_dupe = (np.array(range(30)) * 99) - self.df3 = DataFrame({'A': ([0.1] * 1000), 'B': ([1] * 1000),}) + self.df3 = DataFrame({'A': ([0.1] * 1000), 'B': ([1] * 1000)}) self.df3 = concat([self.df3, (2 * self.df3), (3 * self.df3)]) self.df_big = DataFrame(dict(A=(['foo'] * 1000000))) @@ -343,7 +343,7 @@ class AssignTimeseriesIndex(object): def setup(self): N = 100000 np.random.seed(1234) - dx = date_range('1/1/2000', periods=N, freq='H') + idx = date_range('1/1/2000', periods=N, freq='H') self.df = DataFrame(np.random.randn(N, 1), columns=['A'], index=idx) def time_frame_assign_timeseries_index(self): @@ -367,5 +367,3 @@ def time_assign_with_setitem(self): np.random.seed(1234) for i in range(100): self.df[i] = np.random.randn(self.N) - - diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index dc1d6de73f8ae..6dd95636dadb3 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -88,8 +88,7 @@ class to_numeric_downcast(object): dtype='datetime64[D]'), N), 'string-float': (['1.1'] * N2) + ([2] * N2), 'int-list': ([1] * N2) + ([2] * N2), - 'int32': np.repeat(np.int32(1), N) - } + 'int32': np.repeat(np.int32(1), N)} def setup(self, dtype, downcast): self.data = self.data_dict[dtype] @@ -104,7 +103,7 @@ def setup(self): n = 1000000 arr = np.repeat([2**63], n) arr = arr + np.arange(n).astype('uint64') - arr = np.array([arr[i] if i%2 == 0 else + arr = np.array([arr[i] if i % 2 == 0 else str(arr[i]) for i in range(n)], dtype=object) diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py index c718b13912e73..862a6c836afdc 100644 --- a/asv_bench/benchmarks/io_bench.py +++ b/asv_bench/benchmarks/io_bench.py @@ -200,7 +200,7 @@ def setup(self, compression, engine): # The Python 2 C parser can't read bz2 from open files. raise NotImplementedError try: - import s3fs + import s3fs # noqa:F401 except ImportError: # Skip these benchmarks if `boto` is not installed. raise NotImplementedError @@ -226,16 +226,16 @@ def setup(self): self.N = 100000 self.C = 5 self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)}) - self.df.to_json(self.fname,orient="records",lines=True) + self.df.to_json(self.fname, orient="records", lines=True) def time_read_json_lines(self): pd.read_json(self.fname, lines=True) def time_read_json_lines_chunk(self): - pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4)) + pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N // 4)) def peakmem_read_json_lines(self): pd.read_json(self.fname, lines=True) def peakmem_read_json_lines_chunk(self): - pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4)) + pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N // 4)) diff --git a/asv_bench/benchmarks/io_sql.py b/asv_bench/benchmarks/io_sql.py index ec855e5d33525..46285f5e51872 100644 --- a/asv_bench/benchmarks/io_sql.py +++ b/asv_bench/benchmarks/io_sql.py @@ -1,10 +1,10 @@ -import sqlalchemy +import sqlalchemy # noqa:F401 from .pandas_vb_common import * import sqlite3 from sqlalchemy import create_engine -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ # to_sql class WriteSQL(object): @@ -23,7 +23,7 @@ def time_sqlalchemy(self): self.df.to_sql('test1', self.engine, if_exists='replace') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ # read_sql class ReadSQL(object): @@ -47,7 +47,7 @@ def time_read_table_sqlalchemy(self): read_sql_table('test2', self.engine) -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ # type specific write class WriteSQLTypes(object): @@ -75,7 +75,7 @@ def time_datetime_sqlalchemy(self): self.df[['datetime']].to_sql('test_datetime', self.engine, if_exists='replace') -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ # type specific read class ReadSQLTypes(object): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 3b0e33b72ddc1..b7fca7fc8e150 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -291,11 +291,11 @@ def setup(self): groups = tm.makeStringIndex(10).values self.left = pd.DataFrame({'group': groups.repeat(5000), - 'key' : np.tile(np.arange(0, 10000, 2), 10), + 'key': np.tile(np.arange(0, 10000, 2), 10), 'lvalue': np.random.randn(50000)}) - self.right = pd.DataFrame({'key' : np.arange(10000), - 'rvalue' : np.random.randn(10000)}) + self.right = pd.DataFrame({'key': np.arange(10000), + 'rvalue': np.random.randn(10000)}) def time_merge_ordered(self): merge_ordered(self.left, self.right, on='key', left_by='group') diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index 849776bf9a591..81025ec7bbe22 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -7,7 +7,7 @@ from pandas import date_range try: - import pandas.tseries.holiday + import pandas.tseries.holiday # noqa:F401 except ImportError: pass diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py index 758162f000e8d..4243cbb203b78 100644 --- a/asv_bench/benchmarks/packers.py +++ b/asv_bench/benchmarks/packers.py @@ -3,7 +3,7 @@ import pandas as pd from collections import OrderedDict from pandas.compat import BytesIO -import sqlite3 + import os from sqlalchemy import create_engine import numpy as np @@ -292,7 +292,7 @@ class STATA(_Packers): def setup(self): self._setup() - self.df3=self.df.copy() + self.df3 = self.df.copy() self.df3['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)] self.df3['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)] self.df3['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)] diff --git a/asv_bench/benchmarks/parser_vb.py b/asv_bench/benchmarks/parser_vb.py index 32bf7e50d1a89..141219b90281f 100644 --- a/asv_bench/benchmarks/parser_vb.py +++ b/asv_bench/benchmarks/parser_vb.py @@ -1,6 +1,10 @@ -from .pandas_vb_common import * import os -from pandas import read_csv + +import numpy as np + +import pandas as pd +from pandas import read_csv, DataFrame + try: from cStringIO import StringIO except ImportError: diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 16889b2f19e89..98c8c3d992368 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,4 +1,8 @@ -from .pandas_vb_common import * +import numpy as np + +import pandas as pd +from pandas import DataFrame, Series, DatetimeIndex + try: from pandas import date_range except ImportError: diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 537d275e7c727..f41e48a77e471 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,6 +1,12 @@ -from .pandas_vb_common import * +import random from random import shuffle +import numpy as np +from pandas import (Index, MultiIndex, DatetimeIndex, + DataFrame, Series, date_range) +from pandas._libs import lib +import pandas.util.testing as tm + class Reindexing(object): goal_time = 0.2 @@ -34,7 +40,7 @@ def time_reindex_multiindex(self): self.s1.reindex(self.s2.index) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Pad / backfill @@ -79,7 +85,7 @@ def time_pad_float32(self): self.ts4.fillna(method='pad') -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # align on level @@ -105,7 +111,7 @@ def time_reindex_level(self): self.df_level.reindex(self.df.index, level=1) -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # drop_duplicates @@ -118,7 +124,7 @@ def setup(self): self.key1 = tm.makeStringIndex(self.N).values.repeat(self.K) self.key2 = tm.makeStringIndex(self.N).values.repeat(self.K) self.df = DataFrame({'key1': self.key1, 'key2': self.key2, - 'value': np.random.randn((self.N * self.K)),}) + 'value': np.random.randn((self.N * self.K))}) self.col_array_list = list(self.df.values.T) self.df2 = self.df.copy() @@ -160,7 +166,7 @@ def time_frame_drop_dups_int(self): def time_frame_drop_dups_bool(self): self.df_bool.drop_duplicates() -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # blog "pandas escaped the zoo" diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 157d5fe1e3948..d182e34851a9e 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,4 +1,9 @@ -from .pandas_vb_common import * +import numpy as np + +import pandas as pd +from pandas import date_range, DatetimeIndex, Series, DataFrame + +from .pandas_vb_common import DateRange, datetools class replace_fillna(object): diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index 177e3e7cb87fa..aca5f2af5ef3f 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -1,5 +1,7 @@ -from .pandas_vb_common import * -from pandas import melt, wide_to_long +import numpy as np + +import pandas as pd +from pandas import melt, wide_to_long, MultiIndex, DataFrame, date_range class melt_dataframe(object): @@ -23,16 +25,19 @@ def setup(self): self.index = MultiIndex.from_arrays([np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)]) self.df = DataFrame(np.random.randn(10000, 4), index=self.index) self.index = date_range('1/1/2000', periods=10000, freq='h') - self.df = DataFrame(randn(10000, 50), index=self.index, columns=range(50)) + self.df = DataFrame(np.random.randn(10000, 50), + index=self.index, columns=range(50)) self.pdf = self.unpivot(self.df) - self.f = (lambda : self.pdf.pivot('date', 'variable', 'value')) + self.f = (lambda: self.pdf.pivot('date', 'variable', 'value')) def time_reshape_pivot_time_series(self): self.f() def unpivot(self, frame): (N, K) = frame.shape - self.data = {'value': frame.values.ravel('F'), 'variable': np.asarray(frame.columns).repeat(N), 'date': np.tile(np.asarray(frame.index), K), } + self.data = {'value': frame.values.ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K), } return DataFrame(self.data, columns=['date', 'variable', 'value']) @@ -67,9 +72,9 @@ def setup(self): n = 1000 levels = np.arange(m) - index = pd.MultiIndex.from_product([levels]*2) + index = pd.MultiIndex.from_product([levels] * 2) columns = np.arange(n) - values = np.arange(m*m*n).reshape(m*m, n) + values = np.arange(m * m * n).reshape(m * m, n) self.df = pd.DataFrame(values, index, columns) self.df2 = self.df.iloc[:-1] diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 899349cd21f84..f3ec73837e331 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -1,6 +1,5 @@ -from .pandas_vb_common import * -import pandas as pd import numpy as np +import pandas as pd class DataframeRolling(object): @@ -55,7 +54,7 @@ def time_rolling_corr(self): def time_rolling_cov(self): (self.dfs.rolling(self.wins).cov()) - + def time_rolling_quantile_0_l(self): (self.df.rolling(self.winl).quantile(0.0)) @@ -147,7 +146,7 @@ def time_rolling_corr(self): def time_rolling_cov(self): (self.srs.rolling(self.wins).cov()) - + def time_rolling_quantile_0_l(self): (self.sr.rolling(self.winl).quantile(0.0)) diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index 5e8cf3a0350bb..e4f98d2ef5e25 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -1,4 +1,9 @@ -from .pandas_vb_common import * +from datetime import datetime + +import numpy as np + +import pandas as pd +from pandas import Series class series_constructor_no_data_datetime_index(object): @@ -6,8 +11,8 @@ class series_constructor_no_data_datetime_index(object): def setup(self): self.dr = pd.date_range( - start=datetime(2015,10,26), - end=datetime(2016,1,1), + start=datetime(2015, 10, 26), + end=datetime(2016, 1, 1), freq='50s' ) # ~100k long diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index a46205026481e..a1668125afe94 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -1,8 +1,10 @@ import itertools -from .pandas_vb_common import * +import numpy as np import scipy.sparse -from pandas import SparseSeries, SparseDataFrame, SparseArray + +import pandas as pd +from pandas import SparseSeries, SparseDataFrame, SparseArray, date_range class sparse_series_to_frame(object): @@ -169,7 +171,6 @@ def time_sparse_division_1percent(self): self.a_1percent / self.b_1percent - class sparse_arithmetic_block(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 1e1eb167b46bf..b4914d76dbc45 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,4 +1,13 @@ -from .pandas_vb_common import * +import random +import numpy as np + +import pandas as pd +from pandas import DataFrame, Series, MultiIndex + +from .pandas_vb_common import (rolling_min, rolling_mean, rolling_median, + rolling_max, rolling_sum, + rolling_var, rolling_std, + rolling_kurt, rolling_skew) def _set_use_bottleneck_False(): @@ -37,7 +46,14 @@ class stat_ops_level_frame_sum(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) + self.index = MultiIndex(levels=[np.arange(10), + np.arange(100), + np.arange(100)], + labels=[np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), + 10), + np.tile(np.tile(np.arange(100), 100), + 10)]) random.shuffle(self.index.values) self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) @@ -50,10 +66,19 @@ class stat_ops_level_frame_sum_multiple(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) + self.index = MultiIndex(levels=[np.arange(10), + np.arange(100), + np.arange(100)], + labels=[np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), + 10), + np.tile(np.tile(np.arange(100), + 100), 10)]) random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + self.df = DataFrame(np.random.randn(len(self.index), 4), + index=self.index) + self.df_level = DataFrame(np.random.randn(100, 4), + index=self.index.levels[1]) def time_stat_ops_level_frame_sum_multiple(self): self.df.sum(level=[0, 1]) @@ -63,10 +88,19 @@ class stat_ops_level_series_sum(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) + self.index = MultiIndex(levels=[np.arange(10), + np.arange(100), + np.arange(100)], + labels=[np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), + 10), + np.tile(np.tile(np.arange(100), 100), + 10)]) random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + self.df = DataFrame(np.random.randn(len(self.index), 4), + index=self.index) + self.df_level = DataFrame(np.random.randn(100, 4), + index=self.index.levels[1]) def time_stat_ops_level_series_sum(self): self.df[1].sum(level=1) @@ -76,10 +110,19 @@ class stat_ops_level_series_sum_multiple(object): goal_time = 0.2 def setup(self): - self.index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) + self.index = MultiIndex(levels=[np.arange(10), + np.arange(100), + np.arange(100)], + labels=[np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), + 10), + np.tile(np.tile(np.arange(100), + 100), 10)]) random.shuffle(self.index.values) - self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) - self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + self.df = DataFrame(np.random.randn(len(self.index), 4), + index=self.index) + self.df_level = DataFrame(np.random.randn(100, 4), + index=self.index.levels[1]) def time_stat_ops_level_series_sum_multiple(self): self.df[1].sum(level=[0, 1]) @@ -130,7 +173,9 @@ class stats_rank_average(object): goal_time = 0.2 def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) + self.values = np.concatenate([np.arange(100000), + np.random.randn(100000), + np.arange(100000)]) self.s = Series(self.values) def time_stats_rank_average(self): @@ -152,7 +197,9 @@ class stats_rank_pct_average(object): goal_time = 0.2 def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) + self.values = np.concatenate([np.arange(100000), + np.random.randn(100000), + np.arange(100000)]) self.s = Series(self.values) def time_stats_rank_pct_average(self): @@ -163,7 +210,9 @@ class stats_rank_pct_average_old(object): goal_time = 0.2 def setup(self): - self.values = np.concatenate([np.arange(100000), np.random.randn(100000), np.arange(100000)]) + self.values = np.concatenate([np.arange(100000), + np.random.randn(100000), + np.arange(100000)]) self.s = Series(self.values) def time_stats_rank_pct_average_old(self): diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 948d4b92a5a57..950615956d7f6 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,19 +1,29 @@ -from .pandas_vb_common import * import string -import itertools as IT +import itertools + +import numpy as np + import pandas.util.testing as testing +from pandas import Series class StringMethods(object): goal_time = 0.2 def make_series(self, letters, strlen, size): - return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))]) + return Series([str(x) for x in + np.fromiter(itertools.cycle(letters), + count=(size * strlen), + dtype='|S1').view('|S{}'.format(strlen))]) def setup(self): - self.many = self.make_series(('matchthis' + string.ascii_uppercase), strlen=19, size=10000) - self.few = self.make_series(('matchthis' + (string.ascii_uppercase * 42)), strlen=19, size=10000) - self.s = self.make_series(string.ascii_uppercase, strlen=10, size=10000).str.join('|') + upper = string.ascii_uppercase + self.many = self.make_series(('matchthis' + upper), + strlen=19, size=10000) + self.few = self.make_series(('matchthis' + (upper * 42)), + strlen=19, size=10000) + self.s = self.make_series(upper, + strlen=10, size=10000).str.join('|') def time_cat(self): self.many.str.cat(sep=',') @@ -64,7 +74,8 @@ def time_pad(self): self.many.str.pad(100, side='both') def time_repeat(self): - self.many.str.repeat(list(IT.islice(IT.cycle(range(1, 4)), len(self.many)))) + cycle = itertools.cycle(range(1, 4)) + self.many.str.repeat(list(itertools.islice(cycle, len(self.many)))) def time_replace(self): self.many.str.replace('(matchthis)', '\x01\x01') diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 2d1ff3a24f787..1316b4f2a1b96 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,5 +1,7 @@ -from .pandas_vb_common import * -from pandas import to_timedelta, Timestamp +import numpy as np + +import pandas as pd +from pandas import to_timedelta, Timestamp, Timedelta class ToTimedelta(object): @@ -69,6 +71,7 @@ def setup(self): self.series = pd.Series( pd.timedelta_range('1 days', periods=self.N, freq='h') ) + def time_dt_accessor(self): self.series.dt diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 9d7d6d2998a8b..f7deb53d473c9 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -1,4 +1,4 @@ -from pandas import to_timedelta, Timestamp +from pandas import Timestamp import pytz import datetime diff --git a/asv_bench/vbench_to_asv.py b/asv_bench/vbench_to_asv.py index b1179387e65d5..f325bf331a71b 100644 --- a/asv_bench/vbench_to_asv.py +++ b/asv_bench/vbench_to_asv.py @@ -111,7 +111,6 @@ def visit_FunctionDef(self, node): def translate_module(target_module): g_vars = {} - l_vars = {} exec('import ' + target_module) in g_vars print(target_module) diff --git a/ci/lint.sh b/ci/lint.sh index 5d9fafe6c9064..6ad6a77910d8a 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -23,6 +23,13 @@ if [ "$LINT" ]; then fi echo "Linting setup.py DONE" + echo "Linting asv_bench" + flake8 asv_bench --ignore=E501,F405,F403,F811 --exclude asv_bench/benchmarks/pandas_vb_common.py,asv_bench/benchmarks/hdfstore_bench.py + if [ $? -ne "0" ]; then + RET=1 + fi + echo "Linting asv_bench DONE" + echo "Linting *.pyx" flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403 if [ $? -ne "0" ]; then