Skip to content

Commit

Permalink
Merge pull request #99 from troy46/histogram-bug-fix
Browse files Browse the repository at this point in the history
Fix the bug that first bin in histogram always has target zero
  • Loading branch information
fbdesignpro authored Nov 14, 2023
2 parents 36af881 + c027b2a commit 2ec0848
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions sweetviz/graph_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TODO: possible 1-off bug in counts from cut in lower bin
source_bins_series = pd.cut(to_process.source,
bins=bin_limits,
labels=False)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
# Create empty bin_averages, then fill in with values
bin_averages = [None] * num_bins
for b in range(0, num_bins):
Expand All @@ -111,7 +113,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TARGET NUMERIC: with compare TARGET
compare_bins_series = pd.cut(to_process.compare,
bins=bin_limits,
labels=False)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
bin_averages = [None] * num_bins
for b in range(0, num_bins):
bin_averages[b] = \
Expand All @@ -123,7 +127,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
source_true = to_process.source[to_process.source_target == 1]
source_bins_series = pd.cut(source_true,
bins=bin_limits,
labels=False)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
total_counts_source = bin_counts[0] if to_process.compare is not None else bin_counts
total_counts_source = total_counts_source * len(cleaned_source)
bin_true_counts_source = [None] * num_bins
Expand Down Expand Up @@ -154,7 +160,9 @@ def __init__(self, which_graph: str, to_process: FeatureToProcess):
# TODO: possible 1-off bug in counts from cut in lower bin
compare_bins_series = pd.cut(compare_true,
bins=bin_limits,
labels=False)
labels=False,
right=False)
source_bins_series = source_bins_series.fillna(num_bins-1)
total_counts_compare = bin_counts[1] * len(cleaned_compare)
bin_true_counts_compare = [None] * num_bins
for b in range(0, num_bins):
Expand Down

0 comments on commit 2ec0848

Please sign in to comment.