Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: revamp null count supression for large frames in df.info() #5974

Merged
1 commit merged into from Jan 16, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Improvements to existing features
- perf improvements in Series datetime/timedelta binary operations (:issue:`5801`)
- `option_context` context manager now available as top-level API (:issue:`5752`)
- df.info() view now display dtype info per column (:issue: `5682`)
- df.info() now honors option max_info_rows, disable null counts for large frames (:issue: `5974`)
- perf improvements in DataFrame ``count/dropna`` for ``axis=1``
- Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue: `5879`)
- support ``dtypes`` on ``Panel``
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,9 @@

pc_max_info_rows_doc = """
: int or None
Deprecated.
"""

pc_max_info_rows_deprecation_warning = """\
max_info_rows has been deprecated, as reprs no longer use the info view.
df.info() will usually show null-counts for each column.
For large frames this can be quite slow. max_info_rows and max_info_cols
limit this null check only to frames with smaller dimensions then specified.
"""

pc_large_repr_doc = """
Expand Down Expand Up @@ -266,9 +264,6 @@ def mpl_style_cb(key):
msg=pc_height_deprecation_warning,
rkey='display.max_rows')

cf.deprecate_option('display.max_info_rows',
msg=pc_max_info_rows_deprecation_warning)

tc_sim_interactive_doc = """
: boolean
Whether to simulate interactive mode for purposes of testing
Expand Down
29 changes: 22 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,20 +1419,35 @@ def info(self, verbose=True, buf=None, max_cols=None):
max_cols = get_option(
'display.max_info_columns', len(self.columns) + 1)

if verbose and len(self.columns) <= max_cols:
max_rows = get_option('display.max_info_rows', len(self) + 1)

show_counts = ((len(self.columns) <= max_cols) and
(len(self) < max_rows))
if verbose:
lines.append('Data columns (total %d columns):' %
len(self.columns))
space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4
counts = self.count()
if len(cols) != len(counts): # pragma: no cover
raise AssertionError('Columns must equal counts (%d != %d)' %
(len(cols), len(counts)))
counts = None

tmpl = "%s%s"
if show_counts:
counts = self.count()
if len(cols) != len(counts): # pragma: no cover
raise AssertionError('Columns must equal counts (%d != %d)' %
(len(cols), len(counts)))
tmpl = "%s non-null %s"

dtypes = self.dtypes
for col, count in compat.iteritems(counts):
for i, col in enumerate(self.columns):
dtype = dtypes[col]
col = com.pprint_thing(col)

count= ""
if show_counts:
count = counts[i]

lines.append(_put_str(col, space) +
'%d non-null %s' % (count, dtype))
tmpl % (count, dtype))
else:
lines.append(self.columns.summary(name='Columns'))

Expand Down