Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add (partial) linting to asvs #18620

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from pandas.core.algorithms import take_1d

try:
from cStringIO import StringIO
from cStringIO import StringIO # noqa:F401
except ImportError:
from io import StringIO
from io import StringIO # noqa:F401

try:
from pandas._libs import algos
Expand All @@ -19,7 +19,6 @@
except ImportError:
have_real_test_parallel = False


def test_parallel(num_threads=1):

def wrapper(fname):
Expand Down Expand Up @@ -153,7 +152,6 @@ def time_groups_8(self):
self._pg8_groups()



class nogil_take1d_float64(object):
goal_time = 0.2

Expand Down
50 changes: 24 additions & 26 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as np

from .pandas_vb_common import *
from string import ascii_letters, digits
from itertools import product
Expand All @@ -17,7 +19,8 @@ def time_groupby_agg_builtins1(self):
def time_groupby_agg_builtins2(self):
self.df.groupby(['jim', 'joe']).agg([sum, min, max])

#----------------------------------------------------------------------

# ----------------------------------------------------------------------
# dict return values

class groupby_apply_dict_return(object):
Expand All @@ -32,7 +35,7 @@ def time_groupby_apply_dict_return(self):
self.data.groupby(self.labels).apply(self.f)


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# groups

class Groups(object):
Expand All @@ -41,7 +44,7 @@ class Groups(object):
size = 2 ** 22
data = {
'int64_small': Series(np.random.randint(0, 100, size=size)),
'int64_large' : Series(np.random.randint(0, 10000, size=size)),
'int64_large': Series(np.random.randint(0, 10000, size=size)),
'object_small': Series(tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size))),
'object_large': Series(tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size)))
}
Expand All @@ -56,7 +59,7 @@ def time_groupby_groups(self, df):
self.df.groupby(self.df).groups


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# First / last functions

class FirstLast(object):
Expand All @@ -72,7 +75,7 @@ def setup(self, dtype):
if dtype == 'datetime':
self.df = DataFrame(
{'values': date_range('1/1/2011', periods=100000, freq='s'),
'key': range(100000),})
'key': range(100000)})
elif dtype == 'object':
self.df = DataFrame(
{'values': (['foo'] * 100000),
Expand All @@ -98,7 +101,7 @@ def time_groupby_nth_none(self, dtype):
self.df.groupby('key').nth(0)


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# DataFrame Apply overhead

class groupby_frame_apply(object):
Expand Down Expand Up @@ -138,7 +141,7 @@ def time_groupby_frame_apply_df_copy_overhead(self):
self.df.groupby('key').apply(self.df_copy_function)


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# 2d grouping, aggregate many columns

class groupby_frame_cython_many_columns(object):
Expand All @@ -152,7 +155,7 @@ def time_sum(self):
self.df.groupby(self.labels).sum()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# single key, long, integer key

class groupby_frame_singlekey_integer(object):
Expand All @@ -167,7 +170,7 @@ def time_sum(self):
self.df.groupby(self.labels).sum()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# DataFrame nth

class groupby_nth(object):
Expand All @@ -189,7 +192,7 @@ def time_groupby_series_nth_none(self):
self.df[1].groupby(self.df[0]).nth(0)


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# groupby_indices replacement, chop up Series

class groupby_indices(object):
Expand Down Expand Up @@ -226,7 +229,7 @@ def time_groupby_int64_overflow(self):
self.df.groupby(list('abcde')).max()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# count() speed

class groupby_multi_count(object):
Expand Down Expand Up @@ -269,7 +272,7 @@ def time_groupby_int_count(self):
self.df.groupby(['key1', 'key2']).count()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# nunique() speed

class groupby_nunique(object):
Expand All @@ -285,7 +288,7 @@ def time_groupby_nunique(self):
self.df.groupby(['key1', 'key2']).nunique()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# group with different functions per column

class groupby_agg_multi(object):
Expand Down Expand Up @@ -356,7 +359,7 @@ def time_groupby_series_simple_rank(self):
self.df.groupby('key1').rank(pct=True)


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# size() speed

class groupby_size(object):
Expand Down Expand Up @@ -386,8 +389,7 @@ def time_groupby_size(self):
self.draws.groupby(self.cats).size()



#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# groupby with a variable value for ngroups

class GroupBySuite(object):
Expand Down Expand Up @@ -582,7 +584,8 @@ def make_grouper(self, N):
return pd.date_range('1900-01-01', freq='D', periods=N,
tz='US/Central')

#----------------------------------------------------------------------

# ----------------------------------------------------------------------
# Series.value_counts

class series_value_counts(object):
Expand All @@ -607,7 +610,7 @@ def time_value_counts_strings(self):
self.s.value_counts()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# pivot_table

class groupby_pivot_table(object):
Expand All @@ -624,7 +627,7 @@ def time_groupby_pivot_table(self):
self.df.pivot_table(index='key1', columns=['key2', 'key3'])


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Sum booleans #2692

class groupby_sum_booleans(object):
Expand All @@ -638,7 +641,7 @@ def time_groupby_sum_booleans(self):
self.df.groupby('ii').sum()


#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# multi-indexed group sum #9049

class groupby_sum_multiindex(object):
Expand All @@ -652,7 +655,7 @@ def time_groupby_sum_multiindex(self):
self.df.groupby(level=[0, 1]).sum()


#-------------------------------------------------------------------------------
# ----------------------------------------------------------------------
# Transform testing

class Transform(object):
Expand Down Expand Up @@ -710,8 +713,6 @@ def time_transform_multi_key4(self):
self.df4.groupby(['jim', 'joe'])['jolie'].transform('max')




np.random.seed(0)
N = 120000
N_TRANSITIONS = 1400
Expand All @@ -723,9 +724,6 @@ def time_transform_multi_key4(self):
df = DataFrame({'signal': np.random.rand(N), })





class groupby_transform_series(object):
goal_time = 0.2

Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def setup(self):

# duptes
self.idx_dupe = (np.array(range(30)) * 99)
self.df3 = DataFrame({'A': ([0.1] * 1000), 'B': ([1] * 1000),})
self.df3 = DataFrame({'A': ([0.1] * 1000), 'B': ([1] * 1000)})
self.df3 = concat([self.df3, (2 * self.df3), (3 * self.df3)])

self.df_big = DataFrame(dict(A=(['foo'] * 1000000)))
Expand Down Expand Up @@ -343,7 +343,7 @@ class AssignTimeseriesIndex(object):
def setup(self):
N = 100000
np.random.seed(1234)
dx = date_range('1/1/2000', periods=N, freq='H')
idx = date_range('1/1/2000', periods=N, freq='H')
self.df = DataFrame(np.random.randn(N, 1), columns=['A'], index=idx)

def time_frame_assign_timeseries_index(self):
Expand All @@ -367,5 +367,3 @@ def time_assign_with_setitem(self):
np.random.seed(1234)
for i in range(100):
self.df[i] = np.random.randn(self.N)


5 changes: 2 additions & 3 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ class to_numeric_downcast(object):
dtype='datetime64[D]'), N),
'string-float': (['1.1'] * N2) + ([2] * N2),
'int-list': ([1] * N2) + ([2] * N2),
'int32': np.repeat(np.int32(1), N)
}
'int32': np.repeat(np.int32(1), N)}

def setup(self, dtype, downcast):
self.data = self.data_dict[dtype]
Expand All @@ -104,7 +103,7 @@ def setup(self):
n = 1000000
arr = np.repeat([2**63], n)
arr = arr + np.arange(n).astype('uint64')
arr = np.array([arr[i] if i%2 == 0 else
arr = np.array([arr[i] if i % 2 == 0 else
str(arr[i]) for i in range(n)],
dtype=object)

Expand Down
8 changes: 4 additions & 4 deletions asv_bench/benchmarks/io_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def setup(self, compression, engine):
# The Python 2 C parser can't read bz2 from open files.
raise NotImplementedError
try:
import s3fs
import s3fs # noqa:F401
except ImportError:
# Skip these benchmarks if `boto` is not installed.
raise NotImplementedError
Expand All @@ -226,16 +226,16 @@ def setup(self):
self.N = 100000
self.C = 5
self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)})
self.df.to_json(self.fname,orient="records",lines=True)
self.df.to_json(self.fname, orient="records", lines=True)

def time_read_json_lines(self):
pd.read_json(self.fname, lines=True)

def time_read_json_lines_chunk(self):
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N // 4))

def peakmem_read_json_lines(self):
pd.read_json(self.fname, lines=True)

def peakmem_read_json_lines_chunk(self):
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N // 4))
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/io_sql.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import sqlalchemy
import sqlalchemy # noqa:F401
from .pandas_vb_common import *
import sqlite3
from sqlalchemy import create_engine


#-------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# to_sql

class WriteSQL(object):
Expand All @@ -23,7 +23,7 @@ def time_sqlalchemy(self):
self.df.to_sql('test1', self.engine, if_exists='replace')


#-------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# read_sql

class ReadSQL(object):
Expand All @@ -47,7 +47,7 @@ def time_read_table_sqlalchemy(self):
read_sql_table('test2', self.engine)


#-------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# type specific write

class WriteSQLTypes(object):
Expand Down Expand Up @@ -75,7 +75,7 @@ def time_datetime_sqlalchemy(self):
self.df[['datetime']].to_sql('test_datetime', self.engine, if_exists='replace')


#-------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# type specific read

class ReadSQLTypes(object):
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,11 @@ def setup(self):
groups = tm.makeStringIndex(10).values

self.left = pd.DataFrame({'group': groups.repeat(5000),
'key' : np.tile(np.arange(0, 10000, 2), 10),
'key': np.tile(np.arange(0, 10000, 2), 10),
'lvalue': np.random.randn(50000)})

self.right = pd.DataFrame({'key' : np.arange(10000),
'rvalue' : np.random.randn(10000)})
self.right = pd.DataFrame({'key': np.arange(10000),
'rvalue': np.random.randn(10000)})

def time_merge_ordered(self):
merge_ordered(self.left, self.right, on='key', left_by='group')
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/offset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas import date_range

try:
import pandas.tseries.holiday
import pandas.tseries.holiday # noqa:F401
except ImportError:
pass

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
from collections import OrderedDict
from pandas.compat import BytesIO
import sqlite3

import os
from sqlalchemy import create_engine
import numpy as np
Expand Down Expand Up @@ -292,7 +292,7 @@ class STATA(_Packers):
def setup(self):
self._setup()

self.df3=self.df.copy()
self.df3 = self.df.copy()
self.df3['int8_'] = [randint(np.iinfo(np.int8).min, (np.iinfo(np.int8).max - 27)) for _ in range(self.N)]
self.df3['int16_'] = [randint(np.iinfo(np.int16).min, (np.iinfo(np.int16).max - 27)) for _ in range(self.N)]
self.df3['int32_'] = [randint(np.iinfo(np.int32).min, (np.iinfo(np.int32).max - 27)) for _ in range(self.N)]
Expand Down
Loading