Skip to content

Commit

Permalink
Merge pull request #5974 from y-p/PR_info_max_info_rows
Browse files Browse the repository at this point in the history
ENH: revamp null count supression for large frames in df.info()
  • Loading branch information
y-p committed Jan 16, 2014
2 parents c323daf + a7d8227 commit 63ca307
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Improvements to existing features
- perf improvements in Series datetime/timedelta binary operations (:issue:`5801`)
- `option_context` context manager now available as top-level API (:issue:`5752`)
- df.info() view now display dtype info per column (:issue: `5682`)
- df.info() now honors option max_info_rows, disable null counts for large frames (:issue: `5974`)
- perf improvements in DataFrame ``count/dropna`` for ``axis=1``
- Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue: `5879`)
- support ``dtypes`` on ``Panel``
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,9 @@

pc_max_info_rows_doc = """
: int or None
Deprecated.
"""

pc_max_info_rows_deprecation_warning = """\
max_info_rows has been deprecated, as reprs no longer use the info view.
df.info() will usually show null-counts for each column.
For large frames this can be quite slow. max_info_rows and max_info_cols
limit this null check only to frames with smaller dimensions then specified.
"""

pc_large_repr_doc = """
Expand Down Expand Up @@ -266,9 +264,6 @@ def mpl_style_cb(key):
msg=pc_height_deprecation_warning,
rkey='display.max_rows')

cf.deprecate_option('display.max_info_rows',
msg=pc_max_info_rows_deprecation_warning)

tc_sim_interactive_doc = """
: boolean
Whether to simulate interactive mode for purposes of testing
Expand Down
29 changes: 22 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,20 +1419,35 @@ def info(self, verbose=True, buf=None, max_cols=None):
max_cols = get_option(
'display.max_info_columns', len(self.columns) + 1)

if verbose and len(self.columns) <= max_cols:
max_rows = get_option('display.max_info_rows', len(self) + 1)

show_counts = ((len(self.columns) <= max_cols) and
(len(self) < max_rows))
if verbose:
lines.append('Data columns (total %d columns):' %
len(self.columns))
space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4
counts = self.count()
if len(cols) != len(counts): # pragma: no cover
raise AssertionError('Columns must equal counts (%d != %d)' %
(len(cols), len(counts)))
counts = None

tmpl = "%s%s"
if show_counts:
counts = self.count()
if len(cols) != len(counts): # pragma: no cover
raise AssertionError('Columns must equal counts (%d != %d)' %
(len(cols), len(counts)))
tmpl = "%s non-null %s"

dtypes = self.dtypes
for col, count in compat.iteritems(counts):
for i, col in enumerate(self.columns):
dtype = dtypes[col]
col = com.pprint_thing(col)

count= ""
if show_counts:
count = counts[i]

lines.append(_put_str(col, space) +
'%d non-null %s' % (count, dtype))
tmpl % (count, dtype))
else:
lines.append(self.columns.summary(name='Columns'))

Expand Down

0 comments on commit 63ca307

Please sign in to comment.