Skip to content

Commit

Permalink
Use right=False in pandas.cut.
Browse files Browse the repository at this point in the history
Histograms by default include the left endpoint of each interval, and so we need to use right=False when calling pandas.cut.
  • Loading branch information
troy46 authored Oct 1, 2021
1 parent a86686a commit c027b2a
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions sweetviz/graph_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TODO: possible 1-off bug in counts from cut in lower bin
source_bins_series = pd.cut(to_process.source,
bins=bin_limits,
labels=False)
source_bins_series = source_bins_series.fillna(0)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
# Create empty bin_averages, then fill in with values
bin_averages = [None] * num_bins
for b in range(0, num_bins):
Expand All @@ -111,8 +112,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TARGET NUMERIC: with compare TARGET
compare_bins_series = pd.cut(to_process.compare,
bins=bin_limits,
labels=False)
compare_bins_series = compare_bins_series.fillna(0)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
bin_averages = [None] * num_bins
for b in range(0, num_bins):
bin_averages[b] = \
Expand All @@ -124,8 +126,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
source_true = to_process.source[to_process.source_target == 1]
source_bins_series = pd.cut(source_true,
bins=bin_limits,
labels=False)
source_bins_series = source_bins_series.fillna(0)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
total_counts_source = bin_counts[0] if to_process.compare is not None else bin_counts
total_counts_source = total_counts_source * len(cleaned_source)
bin_true_counts_source = [None] * num_bins
Expand Down Expand Up @@ -156,8 +159,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TODO: possible 1-off bug in counts from cut in lower bin
compare_bins_series = pd.cut(compare_true,
bins=bin_limits,
labels=False)
compare_bins_series = compare_bins_series.fillna(0)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
total_counts_compare = bin_counts[1] * len(cleaned_compare)
bin_true_counts_compare = [None] * num_bins
for b in range(0, num_bins):
Expand Down

0 comments on commit c027b2a

Please sign in to comment.