From 86c2d7dd04a0c305f6a986380ba50bd6d70b1907 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 08:09:22 -0700 Subject: [PATCH 01/11] document group and bin --- README.md | 2 +- src/transforms/bin.d.ts | 212 +++++++++++++++++++++++++++++++++++ src/transforms/group.d.ts | 229 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 442 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bb1176c0d2..5bf49ec073 100644 --- a/README.md +++ b/README.md @@ -2287,7 +2287,7 @@ Most aggregation methods require binding the output channel to an input channel; Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"}) ``` -You can control whether a channel is computed before or after grouping. If a channel is declared only in *options* (and it is not a special group-eligible channel such as *x*, *y*, *z*, *fill*, or stroke), it will be computed after grouping and be passed the grouped data: each datum is the array of input data corresponding to the current group. +You can control whether a channel is computed before or after grouping. If a channel is declared only in *options* (and it is not a special group-eligible channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed after grouping and be passed the grouped data: each datum is the array of input data corresponding to the current group. ```js Plot.groupX({y: "count"}, {x: "species", title: group => group.map(d => d.body_mass_g).join("\n")}) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index bde83fc51f..235a64a98f 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -10,9 +10,40 @@ export type ThresholdsFunction = (values: any[], min: any, max: any) => any[]; export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval; export interface BinOptions { + /** + * Whether the distribution is cumulative (use -1 for the [complementary + * cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29)) + */ cumulative?: boolean | number; + /** + * The domain, or a function that receives the values and returns the + * domain. Values outside the domain will be omitted. + */ domain?: ((values: any[]) => [min: any, max: any]) | [min: any, max: any]; + /** + * The **thresholds** value may be specified as: + * + * * *auto* (default) - Scott’s rule, capped at 200 + * * *freedman-diaconis* - the [Freedman–Diaconis + * rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) + * * *scott* - [Scott’s normal reference + * rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) + * * *sturges* - [Sturges’ + * formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) + * * a count (hint) representing the desired number of bins + * * an array of *n* threshold values for *n* - 1 bins + * * an interval or time interval (see also **interval**) + * * a function that returns an array, count, or time interval + */ thresholds?: Thresholds; + /** + * An alternative way of specifying the bins thresholds. It may be either an + * interval (object with a floor method), a time interval such as *day*, or a + * number. If a number *n*, threshold values are consecutive multiples of *n* + * that span the domain. When the thresholds are specified as an interval, and + * the default **domain** is used, the domain will automatically be extended + * to start and end to align with the interval. + */ interval?: RangeInterval; } @@ -35,17 +66,198 @@ export interface BinReducerImplementation { } export interface BinOutputOptions extends BinOptions { + /** + * The data reducer; defaults to the array of values that belong to the bin in + * input order. + */ data?: BinReducer | null; + /** + * The filter reducer, defaults to a check on empty bins. Use null to return + * all bins, for example to impute sum=0 for a line chart. + */ filter?: BinReducer | null; + /** + * The order in which the bins are generated, specified as an aggregation + * method (defaults to ascending). + */ sort?: BinReducer | null; + /** + * Reverse the order in which the bins are generated. + */ reverse?: boolean; } /** How to reduce binned channel values. */ export type BinOutputs = ChannelReducers & BinOutputOptions; +/** + * Aggregates continuous data—quantitative or temporal values such as + * temperatures or times—into discrete bins and then computes summary statistics + * for each bin such as a count or sum. The binX transform is often used in + * conjunction with the rectY mark, to make histograms. + * + * ``` + * Plot.rectY(penguins, Plot.binX({y: "count"}, {x: "culmen_length_mm"})) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *x* - the middle of the bin’s *x* extent (when binning on *x*) + * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * * *y* - the middle of the bin’s *y* extent (when binning on *y*) + * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * * a function to be passed the array of values for each bin and the extent of + * the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **y** output channel to be a *sum* (not + * merely a count), there should be a corresponding **y** input channel + * specifying which values to sum. If there is not, *sum* will be equivalent to + * *count*. + * + * To control how *x* is divided into bins, the following options are supported: + * + * * **thresholds** - the threshold values; see below + * * **interval** - an alternative method of specifying thresholds + * * **domain** - values outside the domain will be omitted + * * **cumulative** - if positive, each bin will contain all lesser bins + */ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transformed; +/** + * Aggregates continuous data—quantitative or temporal values such as + * temperatures or times—into discrete bins and then computes summary statistics + * for each bin such as a count or sum. The binY transform is often used in + * conjunction with the rectX mark, to make vertical histograms. + * + * ``` + * Plot.rectX(penguins, Plot.binY({x: "count"}, {y: "culmen_length_mm"})) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *x* - the middle of the bin’s *x* extent (when binning on *x*) + * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * * *y* - the middle of the bin’s *y* extent (when binning on *y*) + * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * * a function to be passed the array of values for each bin and the extent of + * the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **y** output channel to be a *sum* (not + * merely a count), there should be a corresponding **y** input channel + * specifying which values to sum. If there is not, *sum* will be equivalent to + * *count*. + * + * To control how *y* is divided into bins, the following options are supported: + * + * * **thresholds** - the threshold values; see below + * * **interval** - an alternative method of specifying thresholds + * * **domain** - values outside the domain will be omitted + * * **cumulative** - if positive, each bin will contain all lesser bins + */ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transformed; +/** + * Aggregates continuous data—quantitative or temporal values such as + * temperatures or times—into discrete *x* and *y* bins and then computes + * summary statistics for each bin such as a count or sum. The bin transform is + * often used in conjunction with the rect mark, to make heatmaps. + * + * ``` + * Plot.rect(penguins, Plot.bin({fill: "count"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *x* - the middle of the bin’s *x* extent (when binning on *x*) + * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * * *y* - the middle of the bin’s *y* extent (when binning on *y*) + * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * * a function to be passed the array of values for each bin and the extent of + * the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **fill** output channel to be a *sum* + * (not merely a count), there should be a corresponding **fill** input channel + * specifying which values to sum. If there is not, *sum* will be equivalent to + * *count*. + * + * To control how *x* and *y* are divided into bins, the following options are supported: + * + * * **thresholds** - the threshold values; see below + * * **interval** - an alternative method of specifying thresholds + * * **domain** - values outside the domain will be omitted + * * **cumulative** - if positive, each bin will contain all lesser bins + * + * To pass separate binning options for *x* and *y*, the **x** and **y** input + * channels can be specified as an object with the options above and a **value** + * option to specify the input channel values. (🌶 NOT TYPED.) + */ export function bin(outputs?: BinOutputs, options?: T & BinOptions): Transformed; diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index d88d99fa3a..aabe2582a7 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -3,18 +3,247 @@ import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; export interface GroupOutputOptions { + /** + * The data reducer; defaults to the subset of data corresponding to the + * group in input order. + */ data?: Reducer | null; + /** + * The filter reducer, defaults to a check on empty groups. Use null to return + * all groups, for example to impute sum=0 for a line chart. + */ filter?: Reducer | null; + /** + * The order in which the groups are generated. + */ sort?: Reducer | null; + /** + * Reverse the order in which the groups are generated. + */ reverse?: boolean; } export type GroupOutputs = ChannelReducers & GroupOutputOptions; +/** + * Aggregates ordinal or categorical data—such as names—into groups and then + * computes summary statistics for each group such as a count or sum. Groups are + * computed on the first channel of *z*, *fill*, or *stroke*, if any. If none of + * *z*, *fill*, or *stroke* are channels, then all data (within each facet) is + * placed into a single group. + * + * ```js + * Plot.groupZ({x: "proportion"}, {fill: "species"}) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **r** output channel to be a *sum* (not + * merely a count), there should be a corresponding **r** input channel + * specifying which values to sum. + * + * You can control whether a channel is computed before or after grouping. If a + * channel is declared only in *options* (and it is not a special group-eligible + * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed + * after grouping and be passed the grouped data: each datum is the array of + * input data corresponding to the current group. + * + * The default reducer for the **title** channel returns a summary list of the + * top 5 values with the corresponding number of occurrences. + * + * See also **groupX**, **groupY**, and **group** if you need to group by *x*, + * *y*, or both. + */ export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; +/** + * Aggregates ordinal or categorical data—such as names—into groups and then + * computes summary statistics for each group such as a count or sum. Typically + * used with the **barY** mark for a categorical histogram. Groups are computed + * on *x* and the first channel of *z*, *fill*, or *stroke*, if any. + * + * ```js + * Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"}) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **r** output channel to be a *sum* (not + * merely a count), there should be a corresponding **r** input channel + * specifying which values to sum. + * + * You can control whether a channel is computed before or after grouping. If a + * channel is declared only in *options* (and it is not a special group-eligible + * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed + * after grouping and be passed the grouped data: each datum is the array of + * input data corresponding to the current group. + * + * The default reducer for the **title** channel returns a summary list of the + * top 5 values with the corresponding number of occurrences. + * + * The outputs may also include *filter* and *sort* options (with *reverse*) to + * specify which groups are generated. Use filter: null to generate empty + * groups, for example to impute sum=0 for a line chart. The *sort* option can + * also target the domain of an associated scale such as *x*, *fx* or *fy*. + * + * See also **groupZ**, **groupY**, and **group** if you need to group by + * series, *y*, or both *x* and *y*. + */ export function groupX(outputs?: GroupOutputs, options?: T): Transformed; +/** + * Aggregates ordinal or categorical data—such as names—into groups and then + * computes summary statistics for each group such as a count or sum. Typically + * used with the **barX** mark for a categorical histogram. Groups are computed + * on *y* and the first channel of *z*, *fill*, or *stroke*, if any. + * + * ```js + * Plot.groupY({x: "sum"}, {y: "species", x: "body_mass_g"}) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **r** output channel to be a *sum* (not + * merely a count), there should be a corresponding **r** input channel + * specifying which values to sum. + * + * You can control whether a channel is computed before or after grouping. If a + * channel is declared only in *options* (and it is not a special group-eligible + * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed + * after grouping and be passed the grouped data: each datum is the array of + * input data corresponding to the current group. + * + * The default reducer for the **title** channel returns a summary list of the + * top 5 values with the corresponding number of occurrences. + * + * The outputs may also include *filter* and *sort* options (with *reverse*) to + * specify which groups are generated. Use filter: null to generate empty + * groups, for example to impute sum=0 for a line chart. The *sort* option can + * also target the domain of an associated scale such as *y*, *fx* or *fy*. + * + * See also **groupZ**, **groupX**, and **group** if you need to group by + * series, *x*, or both *x* and *y*. + */ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; +/** + * Aggregates ordinal or categorical data—such as names—into groups and then + * computes summary statistics for each group such as a count or sum. Typically + * used with the **cell** mark for a categorical heatmap. Groups are computed on + * *x* and *y*, and the first channel of *z*, *fill*, or *stroke*, if any. + * + * ```js + * Plot.group({fill: "count"}, {x: "island", y: "species"}) + * ``` + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * Most aggregation methods require binding the output channel to an input + * channel; for example, if you want the **r** output channel to be a *sum* (not + * merely a count), there should be a corresponding **r** input channel + * specifying which values to sum. + * + * You can control whether a channel is computed before or after grouping. If a + * channel is declared only in *options* (and it is not a special group-eligible + * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed + * after grouping and be passed the grouped data: each datum is the array of + * input data corresponding to the current group. + * + * The default reducer for the **title** channel returns a summary list of the + * top 5 values with the corresponding number of occurrences. + * + * The outputs may also include *filter* and *sort* options (with *reverse*) to + * specify which groups are generated. Use filter: null to generate empty + * groups, for example to impute sum=0 for empty cells. + * + * See also **groupZ**, **groupX**, and **groupY** if you need to group by + * series, *x*, or *y*. + */ export function group(outputs?: GroupOutputs, options?: T): Transformed; From d0b04fc50044ebbdd49efec399ccd51229d77c15 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 08:16:56 -0700 Subject: [PATCH 02/11] edits --- src/transforms/bin.d.ts | 48 +++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 235a64a98f..50cd2e6fae 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -11,31 +11,33 @@ export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval; export interface BinOptions { /** - * Whether the distribution is cumulative (use -1 for the [complementary - * cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29)) + * If true or a positive number, produce a cumulative distribution; if a + * negative number, produce a [complementary cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29) + * distribution; if false or zero (the default), produce a probability + * distribution. */ cumulative?: boolean | number; + /** * The domain, or a function that receives the values and returns the * domain. Values outside the domain will be omitted. */ domain?: ((values: any[]) => [min: any, max: any]) | [min: any, max: any]; + /** * The **thresholds** value may be specified as: * * * *auto* (default) - Scott’s rule, capped at 200 - * * *freedman-diaconis* - the [Freedman–Diaconis - * rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) - * * *scott* - [Scott’s normal reference - * rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) - * * *sturges* - [Sturges’ - * formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) + * * *freedman-diaconis* - the [Freedman–Diaconis rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) + * * *scott* - [Scott’s normal reference rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) + * * *sturges* - [Sturges’ formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) * * a count (hint) representing the desired number of bins * * an array of *n* threshold values for *n* - 1 bins * * an interval or time interval (see also **interval**) * * a function that returns an array, count, or time interval */ thresholds?: Thresholds; + /** * An alternative way of specifying the bins thresholds. It may be either an * interval (object with a floor method), a time interval such as *day*, or a @@ -71,16 +73,19 @@ export interface BinOutputOptions extends BinOptions { * input order. */ data?: BinReducer | null; + /** * The filter reducer, defaults to a check on empty bins. Use null to return * all bins, for example to impute sum=0 for a line chart. */ filter?: BinReducer | null; + /** * The order in which the bins are generated, specified as an aggregation * method (defaults to ascending). */ sort?: BinReducer | null; + /** * Reverse the order in which the bins are generated. */ @@ -107,8 +112,7 @@ export type BinOutputs = ChannelReducers & BinOutputOptions; * * *count* - the number of elements (frequency) * * *distinct* - the number of distinct values * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) + * * *proportion* - the sum proportional to the overall total (weighted frequency) * * *proportion-facet* - the sum proportional to the facet total * * *min* - the minimum value * * *min-index* - the zero-based index of the minimum value @@ -119,16 +123,14 @@ export type BinOutputs = ChannelReducers & BinOutputOptions; * * *mode* - the value with the most occurrences * * *pXX* - the percentile value, where XX is a number in [00,99] * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * * *x* - the middle of the bin’s *x* extent (when binning on *x*) * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) * * *y* - the middle of the bin’s *y* extent (when binning on *y*) * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of - * the bin + * * a function to be passed the array of values for each bin and the extent of the bin * * an object with a *reduce* method, and optionally a *scope* * * Most aggregation methods require binding the output channel to an input @@ -163,8 +165,7 @@ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transfo * * *count* - the number of elements (frequency) * * *distinct* - the number of distinct values * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) + * * *proportion* - the sum proportional to the overall total (weighted frequency) * * *proportion-facet* - the sum proportional to the facet total * * *min* - the minimum value * * *min-index* - the zero-based index of the minimum value @@ -175,16 +176,14 @@ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transfo * * *mode* - the value with the most occurrences * * *pXX* - the percentile value, where XX is a number in [00,99] * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * * *x* - the middle of the bin’s *x* extent (when binning on *x*) * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) * * *y* - the middle of the bin’s *y* extent (when binning on *y*) * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of - * the bin + * * a function to be passed the array of values for each bin and the extent of the bin * * an object with a *reduce* method, and optionally a *scope* * * Most aggregation methods require binding the output channel to an input @@ -219,8 +218,7 @@ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transfo * * *count* - the number of elements (frequency) * * *distinct* - the number of distinct values * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) + * * *proportion* - the sum proportional to the overall total (weighted frequency) * * *proportion-facet* - the sum proportional to the facet total * * *min* - the minimum value * * *min-index* - the zero-based index of the minimum value @@ -231,16 +229,14 @@ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transfo * * *mode* - the value with the most occurrences * * *pXX* - the percentile value, where XX is a number in [00,99] * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * * *x* - the middle of the bin’s *x* extent (when binning on *x*) * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) * * *y* - the middle of the bin’s *y* extent (when binning on *y*) * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of - * the bin + * * a function to be passed the array of values for each bin and the extent of the bin * * an object with a *reduce* method, and optionally a *scope* * * Most aggregation methods require binding the output channel to an input From 7009c0e7e7b7427dcc4195df1e55a5c41a6e1952 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 09:37:39 -0700 Subject: [PATCH 03/11] allow iterable thresholds --- src/transforms/bin.d.ts | 2 +- test/plots/function-contour.ts | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 50cd2e6fae..8fafac04dc 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -7,7 +7,7 @@ export type ThresholdsName = "freedman-diaconis" | "scott" | "sturges" | "auto"; export type ThresholdsFunction = (values: any[], min: any, max: any) => any[]; -export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval; +export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | Iterable; export interface BinOptions { /** diff --git a/test/plots/function-contour.ts b/test/plots/function-contour.ts index 99b8ad53e5..9206e2eb37 100644 --- a/test/plots/function-contour.ts +++ b/test/plots/function-contour.ts @@ -1,4 +1,5 @@ import * as Plot from "@observablehq/plot"; +import * as d3 from "d3"; export async function functionContour() { return Plot.plot({ @@ -12,7 +13,8 @@ export async function functionContour() { x1: 0, y1: 0, x2: 4 * Math.PI, - y2: 4 * Math.PI * (350 / 580) + y2: 4 * Math.PI * (350 / 580), + thresholds: d3.ticks(-80, 50, 10) // testing explicit thresholds }) ] }); From 6bb56615b5ef46dd0cab0581c11189505f344555 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 10:21:45 -0700 Subject: [PATCH 04/11] edits --- src/reducer.d.ts | 1 + src/transforms/bin.d.ts | 76 +++++++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/src/reducer.d.ts b/src/reducer.d.ts index 3ccc311069..da816bde4f 100644 --- a/src/reducer.d.ts +++ b/src/reducer.d.ts @@ -32,4 +32,5 @@ export interface ReducerImplementation { reduceIndex(index: number[], values: any[]): any; } +/** How to reduce aggregated values. */ export type Reducer = ReducerName | ReducerFunction | ReducerImplementation; diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 8fafac04dc..015fddd9f3 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -3,52 +3,60 @@ import type {RangeInterval} from "../interval.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; +/** The built-in thresholds implementations. */ export type ThresholdsName = "freedman-diaconis" | "scott" | "sturges" | "auto"; -export type ThresholdsFunction = (values: any[], min: any, max: any) => any[]; +/** How to subdivide a continuous domain into discrete bins (based on data). */ +export type ThresholdsFunction = (values: any[], min: any, max: any) => any[] | RangeInterval | number; -export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | Iterable; +/** How to subdivide a continuous domain into discrete bins. */ +export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | any[]; +/** Options for the bin transform. */ export interface BinOptions { /** * If true or a positive number, produce a cumulative distribution; if a * negative number, produce a [complementary cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29) - * distribution; if false or zero (the default), produce a probability + * distribution; if false or zero (the default), produce a frequency * distribution. */ cumulative?: boolean | number; /** - * The domain, or a function that receives the values and returns the - * domain. Values outside the domain will be omitted. + * The domain of allowed values; if specified, values outside the domain will + * be ignored; otherwise defaults to the extent [*min*, *max*] of input + * values. If a function, it is passed the input values and must return the + * domain. When **thresholds** are specified as an interval and the default + * domain is used, the start and end of the domain will be extended to align + * with the interval. */ domain?: ((values: any[]) => [min: any, max: any]) | [min: any, max: any]; /** - * The **thresholds** value may be specified as: + * How to subdivide the domain into bins. May be one of: * - * * *auto* (default) - Scott’s rule, capped at 200 + * * *auto* (default) - Scott’s rule, capped at 200 bins * * *freedman-diaconis* - the [Freedman–Diaconis rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) * * *scott* - [Scott’s normal reference rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) * * *sturges* - [Sturges’ formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) - * * a count (hint) representing the desired number of bins + * * a count representing the desired number of bins (a hint; not guaranteed) * * an array of *n* threshold values for *n* - 1 bins - * * an interval or time interval (see also **interval**) - * * a function that returns an array, count, or time interval + * * an interval; see **interval** + * * a function that returns an array, count, or interval */ thresholds?: Thresholds; /** - * An alternative way of specifying the bins thresholds. It may be either an - * interval (object with a floor method), a time interval such as *day*, or a - * number. If a number *n*, threshold values are consecutive multiples of *n* - * that span the domain. When the thresholds are specified as an interval, and - * the default **domain** is used, the domain will automatically be extended - * to start and end to align with the interval. + * How to subdivide the domain into bins; an alternative to **thresholds**. + * May be either: an interval object that implements *floor*, *offset*, and + * *range* methods; a named time interval such as *day*; or a number. If a + * number *n*, threshold values are consecutive multiples of *n* that span the + * domain. */ interval?: RangeInterval; } +/** How to reduce binned values. */ export type BinReducer = | Reducer | BinReducerFunction @@ -60,35 +68,51 @@ export type BinReducer = | "y1" | "y2"; +/** A functional bin reducer implementation. */ export type BinReducerFunction = (values: any[], extent: {x1: any; y1: any; x2: any; y2: any}) => any; -// TODO scope, label +/** A bin reducer implementation. */ export interface BinReducerImplementation { + /** + * Given an *index* representing the contents of the current bin, the array of + * input channel *values*, and the current bin’s *extent*, returns the + * corresponding reduced value to output. + */ reduceIndex(index: number[], values: any[], extent: {x1: any; y1: any; x2: any; y2: any}): any; + // TODO scope + // TODO label } +/** Options for outputs of the bin transform. */ export interface BinOutputOptions extends BinOptions { /** - * The data reducer; defaults to the array of values that belong to the bin in - * input order. + * How to reduce data; defaults to the identity reducer, outputting the array + * of data for each bin in input order. */ data?: BinReducer | null; /** - * The filter reducer, defaults to a check on empty bins. Use null to return - * all bins, for example to impute sum=0 for a line chart. + * How to filter bins: if the reducer emits a falsey value, the bin will be + * dropped; by default, empty bins are dropped. Use null to disable filtering + * and return all bins, for example to impute missing zeroes when summing + * values for a line chart. */ filter?: BinReducer | null; /** - * The order in which the bins are generated, specified as an aggregation - * method (defaults to ascending). + * How to order bins. By default, bins are returned in ascending natural order + * along *x*, *y*, and *z* (or *fill* or *stroke*). Bin order affects draw + * order of overlapping marks, and may be useful in conjunction with the stack + * transform which defaults to input order. For example to place the smallest + * bin within each stack on the baseline: + * + * ```js + * Plot.binX({y: "count", sort: "count"}, {fill: "sex", x: "weight"}) + * ``` */ sort?: BinReducer | null; - /** - * Reverse the order in which the bins are generated. - */ + /** If true, reverse the order of generated bins; defaults to false. */ reverse?: boolean; } From bd58774ad6138958c4ab565391ed61febcd7bb17 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 10:42:42 -0700 Subject: [PATCH 05/11] more type specificity --- src/interval.d.ts | 6 +++--- src/reducer.d.ts | 9 +++++---- src/transforms/bin.d.ts | 12 ++++++------ 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/interval.d.ts b/src/interval.d.ts index faf82793ce..360ab91d2e 100644 --- a/src/interval.d.ts +++ b/src/interval.d.ts @@ -82,10 +82,10 @@ export interface NiceIntervalImplementation extends RangeIntervalImplementati type LiteralInterval = T extends Date ? TimeIntervalName : T extends number ? number : never; /** How to partition a continuous range into discrete intervals. */ -export type Interval = LiteralInterval | IntervalImplementation; +export type Interval = LiteralInterval | IntervalImplementation; /** An interval that supports the range method, say for thresholds or ticks. */ -export type RangeInterval = LiteralInterval | RangeIntervalImplementation; +export type RangeInterval = LiteralInterval | RangeIntervalImplementation; /** An interval that can be used to nice a scale domain. */ -export type NiceInterval = LiteralInterval | NiceIntervalImplementation; +export type NiceInterval = LiteralInterval | NiceIntervalImplementation; diff --git a/src/reducer.d.ts b/src/reducer.d.ts index da816bde4f..d166da0716 100644 --- a/src/reducer.d.ts +++ b/src/reducer.d.ts @@ -25,11 +25,12 @@ export type ReducerName = | "mode" | ReducerPercentile; -export type ReducerFunction = (values: any[]) => any; +export type ReducerFunction = (values: S[]) => T; -// TODO scope, label -export interface ReducerImplementation { - reduceIndex(index: number[], values: any[]): any; +export interface ReducerImplementation { + reduceIndex(index: number[], values: S[]): T; + // TODO scope + // TODO label } /** How to reduce aggregated values. */ diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 015fddd9f3..ad839de83a 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -7,13 +7,13 @@ import type {Transformed} from "./basic.js"; export type ThresholdsName = "freedman-diaconis" | "scott" | "sturges" | "auto"; /** How to subdivide a continuous domain into discrete bins (based on data). */ -export type ThresholdsFunction = (values: any[], min: any, max: any) => any[] | RangeInterval | number; +export type ThresholdsFunction = (values: T[], min: T, max: T) => T[] | RangeInterval | number; /** How to subdivide a continuous domain into discrete bins. */ -export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | any[]; +export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | T[] | number; /** Options for the bin transform. */ -export interface BinOptions { +export interface BinOptions { /** * If true or a positive number, produce a cumulative distribution; if a * negative number, produce a [complementary cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29) @@ -30,7 +30,7 @@ export interface BinOptions { * domain is used, the start and end of the domain will be extended to align * with the interval. */ - domain?: ((values: any[]) => [min: any, max: any]) | [min: any, max: any]; + domain?: ((values: T[]) => [min: T, max: T]) | [min: T, max: T]; /** * How to subdivide the domain into bins. May be one of: @@ -44,7 +44,7 @@ export interface BinOptions { * * an interval; see **interval** * * a function that returns an array, count, or interval */ - thresholds?: Thresholds; + thresholds?: Thresholds; /** * How to subdivide the domain into bins; an alternative to **thresholds**. @@ -53,7 +53,7 @@ export interface BinOptions { * number *n*, threshold values are consecutive multiples of *n* that span the * domain. */ - interval?: RangeInterval; + interval?: RangeInterval; } /** How to reduce binned values. */ From 29e40b0e4c2991ea61aa5abf92f6172d0c2f9fb8 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 10:46:50 -0700 Subject: [PATCH 06/11] edits --- src/transforms/group.d.ts | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index aabe2582a7..acc9ed4144 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -2,27 +2,40 @@ import type {ChannelReducers} from "../channel.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; +/** Options for outputs of the group transform. */ export interface GroupOutputOptions { /** - * The data reducer; defaults to the subset of data corresponding to the - * group in input order. + * How to reduce data; defaults to the identity reducer, outputting the array + * of data for each group in input order. */ data?: Reducer | null; + /** - * The filter reducer, defaults to a check on empty groups. Use null to return - * all groups, for example to impute sum=0 for a line chart. + * How to filter groups: if the reducer emits a falsey value, the group will + * be dropped; by default, empty groups are dropped. Use null to disable + * filtering and return all groups, for example to impute missing zeroes when + * summing values for a line chart. */ filter?: Reducer | null; + /** - * The order in which the groups are generated. + * How to order groups. By default, groups are returned in ascending natural + * order along *x*, *y*, and *z* (or *fill* or *stroke*). Group order affects + * draw order of overlapping marks, and may be useful in conjunction with the + * stack transform which defaults to input order. For example to place the + * smallest group within each stack on the baseline: + * + * ```js + * Plot.groupX({y: "count", sort: "count"}, {fill: "sex", x: "sport"}) + * ``` */ sort?: Reducer | null; - /** - * Reverse the order in which the groups are generated. - */ + + /** If true, reverse the order of generated groups; defaults to false. */ reverse?: boolean; } +/** How to reduce grouped channel values. */ export type GroupOutputs = ChannelReducers & GroupOutputOptions; /** From 590f89c11b2d092e91869a27f1afa681ff2327c1 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 16:25:33 -0700 Subject: [PATCH 07/11] checkpoint edits --- src/channel.d.ts | 50 ++---- src/interval.d.ts | 54 +++++-- src/marks/vector.d.ts | 7 +- src/reducer.d.ts | 198 +++++++++++++++++++++++- src/transforms/bin.d.ts | 272 ++++++++++++++++---------------- src/transforms/group.d.ts | 284 +++++++++++----------------------- src/transforms/map.d.ts | 105 +++++++------ src/transforms/normalize.d.ts | 74 ++++----- src/transforms/normalize.js | 4 +- src/transforms/stack.d.ts | 106 ++++++++----- src/transforms/window.d.ts | 124 ++++++++------- 11 files changed, 705 insertions(+), 573 deletions(-) diff --git a/src/channel.d.ts b/src/channel.d.ts index 3dd6731a34..e4e261e0d7 100644 --- a/src/channel.d.ts +++ b/src/channel.d.ts @@ -118,12 +118,12 @@ export interface Channel { /** * A channel’s values may be expressed as: * - * * a function that returns the corresponding value for each datum - * * a field name, to extract the corresponding value for each datum - * * an iterable of values, typically of the same length as the data - * * a channel transform that returns an iterable of values given the data - * * a constant date, number, or boolean - * * null to represent no value + * - a function that returns the corresponding value for each datum + * - a field name, to extract the corresponding value for each datum + * - an iterable of values, typically of the same length as the data + * - a channel transform that returns an iterable of values given the data + * - a constant date, number, or boolean + * - null to represent no value */ export type ChannelValue = | Iterable // column of values @@ -152,10 +152,10 @@ export type ChannelValueIntervalSpec = ChannelValueSpec | {value: ChannelValue; * The available inputs for imputing scale domains. In addition to a named * channel, an input may be specified as: * - * * *data* - impute from mark data - * * *width* - impute from |*x2* - *x1*| - * * *height* - impute from |*y2* - *y1*| - * * null - impute from input order + * - *data* - impute from mark data + * - *width* - impute from |*x2* - *x1*| + * - *height* - impute from |*y2* - *y1*| + * - null - impute from input order */ export type ChannelDomainValue = ChannelName | "data" | "width" | "height" | null; @@ -163,31 +163,15 @@ export type ChannelDomainValue = ChannelName | "data" | "width" | "height" | nul export interface ChannelDomainOptions { /** * How to produce a singular value (for subsequent sorting) from aggregated - * channel values. Defaults to *max*. A reducer may be specified as: + * channel values; one of: * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function to be passed the array of values - * * an object with a *reduce* method - * - * In the last case, the *reduce* method is repeatedly passed an index (an - * array of integers) and the channel’s array of values; it must then return - * the corresponding aggregate value for the bin. + * - true (default) - alias for *max* + * - false or null - disabled; don’t impute the scale domain + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method */ - reduce?: Reducer | true; + reduce?: Reducer | boolean | null; /** If true, use descending instead of ascending order. */ reverse?: boolean; diff --git a/src/interval.d.ts b/src/interval.d.ts index 360ab91d2e..6e86bcd6f1 100644 --- a/src/interval.d.ts +++ b/src/interval.d.ts @@ -1,4 +1,9 @@ -/** The built-in time intervals; UTC or local time, depending on context. */ +/** + * The built-in time intervals; UTC or local time, depending on context. The + * *week* interval is an alias for *sunday*. The *quarter* interval is every + * three months, and the *half* interval is every six months, aligned at the + * start of the year. + */ export type TimeIntervalName = | "second" | "minute" @@ -65,15 +70,15 @@ export interface RangeIntervalImplementation extends IntervalImplementation extends RangeIntervalImplementation { /** - * Returns a new date representing the earliest interval boundary date after - * or equal to date. For example, d3.timeDay.ceil(date) typically returns - * 12:00 AM local time on the date following the given date. + * Returns the value representing the least interval boundary value greater + * than or equal to the specified *value*. For example, day.ceil(*date*) + * typically returns 12:00 AM on the date following the given date. * * This method is idempotent: if the specified date is already ceilinged to - * the current interval, a new date with an identical time is returned. - * Furthermore, the returned date is the maximum expressible value of the - * associated interval, such that interval.ceil(interval.ceil(date) + 1) - * returns the following interval boundary date. + * the current interval, the same value is returned. Furthermore, the returned + * value is the maximum expressible value of the associated interval, such + * that ceil(ceil(*value*) + *epsilon*) returns the following interval + * boundary value. */ ceil(value: T): T; } @@ -81,11 +86,32 @@ export interface NiceIntervalImplementation extends RangeIntervalImplementati /** A literal that can be automatically promoted to an interval. */ type LiteralInterval = T extends Date ? TimeIntervalName : T extends number ? number : never; -/** How to partition a continuous range into discrete intervals. */ -export type Interval = LiteralInterval | IntervalImplementation; +/** + * How to partition a continuous range into discrete intervals; one of: + * + * - an object that implements *floor* and *offset* methods + * - a named time interval such as *day* (for date intervals) + * - a number (for number intervals), defining intervals at integer multiples of *n* + */ +export type Interval = LiteralInterval | IntervalImplementation; -/** An interval that supports the range method, say for thresholds or ticks. */ -export type RangeInterval = LiteralInterval | RangeIntervalImplementation; +/** + * An interval that also supports the *range* method, used to subdivide a + * continuous range into discrete partitions, say for thresholds or ticks; one + * of: + * + * - an object that implements *floor*, *offset*, and *range* methods + * - a named time interval such as *day* (for date intervals) + * - a number (for number intervals), defining intervals at integer multiples of *n* + */ +export type RangeInterval = LiteralInterval | RangeIntervalImplementation; -/** An interval that can be used to nice a scale domain. */ -export type NiceInterval = LiteralInterval | NiceIntervalImplementation; +/** + * A range interval that also supports the *ceil* method, used to nice a scale + * domain; one of: + * + * - an object that implements *floor*, *ceil*, *offset*, and *range* methods + * - a named time interval such as *day* (for date intervals) + * - a number (for number intervals), defining intervals at integer multiples of *n* + */ +export type NiceInterval = LiteralInterval | NiceIntervalImplementation; diff --git a/src/marks/vector.d.ts b/src/marks/vector.d.ts index ea044277f9..fef4ba5dbf 100644 --- a/src/marks/vector.d.ts +++ b/src/marks/vector.d.ts @@ -6,6 +6,7 @@ export type VectorShapeName = "arrow" | "spike"; /** A vector shape implementation. */ export interface VectorShapeImplementation { + /** Draws a shape of the given *length* and *radius* to the given *context*. */ draw(context: CanvasPath, length: number, radius: number): void; } @@ -53,9 +54,9 @@ export interface VectorOptions extends MarkOptions { * The vector’s position along its orientation relative to its anchor point; a * constant. Assuming a default **rotate** angle of 0°, one of: * - * * *start* - from [*x*, *y*] to [*x*, *y* - *l*] - * * *middle* (default) - from [*x*, *y* + *l* / 2] to [*x*, *y* - *l* / 2] - * * *end* - from [*x*, *y* + *l*] to [*x*, *y*] + * - *start* - from [*x*, *y*] to [*x*, *y* - *l*] + * - *middle* (default) - from [*x*, *y* + *l* / 2] to [*x*, *y* - *l* / 2] + * - *end* - from [*x*, *y* + *l*] to [*x*, *y*] * * where [*x*, *y*] is the vector’s anchor point and *l* is the vector’s * (possibly scaled) length in pixels. diff --git a/src/reducer.d.ts b/src/reducer.d.ts index d166da0716..c8556fdaad 100644 --- a/src/reducer.d.ts +++ b/src/reducer.d.ts @@ -1,11 +1,182 @@ type Digit = 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9; +// For internal use. export type ReducerPercentile = | (`p${Digit}${Digit}` & Record) // see https://github.com/microsoft/TypeScript/issues/29729 | "p25" | "p50" | "p75"; +/** + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function to be passed the array of values for each bin and the extent of the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *x* - the middle of the bin’s *x* extent (when binning on *x*) + * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * * *y* - the middle of the bin’s *y* extent (when binning on *y*) + * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * * a function to be passed the array of values for each bin and the extent of the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *distinct* - the number of distinct values + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * *x* - the middle of the bin’s *x* extent (when binning on *x*) + * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * * *y* - the middle of the bin’s *y* extent (when binning on *y*) + * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * * a function to be passed the array of values for each bin and the extent of the bin + * * an object with a *reduce* method, and optionally a *scope* + * + * * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * + * The following aggregation methods are supported: + * + * * *first* - the first value, in input order + * * *last* - the last value, in input order + * * *count* - the number of elements (frequency) + * * *sum* - the sum of values + * * *proportion* - the sum proportional to the overall total (weighted + * frequency) + * * *proportion-facet* - the sum proportional to the facet total + * * *min* - the minimum value + * * *min-index* - the zero-based index of the minimum value + * * *max* - the maximum value + * * *max-index* - the zero-based index of the maximum value + * * *mean* - the mean value (average) + * * *median* - the median value + * * *mode* - the value with the most occurrences + * * *pXX* - the percentile value, where XX is a number in [00,99] + * * *deviation* - the standard deviation + * * *variance* - the variance per [Welford’s + * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * * a function - passed the array of values for each group + * * an object with a *reduce* method, an optionally a *scope* + * + * + * The following aggregation methods are supported: + * + * - *first* - the first value, in input order + * - *last* - the last value, in input order + * - *count* - the number of elements (frequency) + * - *distinct* - the number of distinct values + * - *sum* - the sum of values + * - *proportion* - the sum proportional to the overall total (weighted frequency) + * - *proportion-facet* - the sum proportional to the facet total + * - *min* - the minimum value + * - *min-index* - the zero-based index of the minimum value + * - *max* - the maximum value + * - *max-index* - the zero-based index of the maximum value + * - *mean* - the mean value (average) + * - *median* - the median value + * - *mode* - the value with the most occurrences + * - *pXX* - the percentile value, where XX is a number in [00,99] + * - *deviation* - the standard deviation + * - *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * + */ export type ReducerName = | "first" | "last" @@ -25,13 +196,38 @@ export type ReducerName = | "mode" | ReducerPercentile; +/** + * A shorthand functional reducer implementation (from source S to target T): + * given an array of input channel *values*, returns the corresponding reduced + * output value. + */ export type ReducerFunction = (values: S[]) => T; +/** A reducer implementation (from source S to target T). */ export interface ReducerImplementation { + /** + * Given an *index* representing the contents of the current group, the array + * of input channel *values*, returns the corresponding reduced output value. + * TODO If no input channel is provided (e.g., *count*) then *values* may be + * undefined. + */ reduceIndex(index: number[], values: S[]): T; // TODO scope // TODO label } -/** How to reduce aggregated values. */ +/** + * How to reduce aggregated (binned or grouped) values; one of: + * + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method + * + * TODO Some reducers require an input channel (e.g., *mean*) while others do + * not (e.g., *count*). + * + * TODO When reducing the *x1*, we may read from the *x* channel if *x1* does + * not exist, and likewise for *x2*. When reducing the *y1*, we may read from + * the *y* channel if *y1* does not exist, and likewise for *y2*. + */ export type Reducer = ReducerName | ReducerFunction | ReducerImplementation; diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index ad839de83a..507200e982 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -3,60 +3,115 @@ import type {RangeInterval} from "../interval.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; -/** The built-in thresholds implementations. */ +/** + * The built-in thresholds implementations; one of: + * + * - *auto* (default) - like *scott*, but capped at 200 bins + * - *freedman-diaconis* - the [Freedman–Diaconis rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) + * - *scott* - [Scott’s normal reference rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) + * - *sturges* - [Sturges’ formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) + */ export type ThresholdsName = "freedman-diaconis" | "scott" | "sturges" | "auto"; -/** How to subdivide a continuous domain into discrete bins (based on data). */ -export type ThresholdsFunction = (values: T[], min: T, max: T) => T[] | RangeInterval | number; +/** + * A functional shorthand thresholds implementation; given an array of observed + * *values* from the domain, and the *min* and *max* representing the extent of + * the domain, returns the corresponding desired thresholds as one of: + * + * - a range interval + * - an array of *n* threshold values for *n* - 1 bins + * - a count representing the desired number of bins (a hint; not guaranteed) + */ +export type ThresholdsFunction = (values: T[], min: T, max: T) => RangeInterval | T[] | number; -/** How to subdivide a continuous domain into discrete bins. */ -export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | T[] | number; +/** + * How to subdivide a continuous domain into discrete bins; one of: + * + * - a named threshold implementation such as *auto* (default) or *sturges* + * - a function that returns an array, count, or range interval + * - a range interval + * - an array of *n* threshold values for *n* - 1 bins + * - a count representing the desired number of bins (a hint; not guaranteed) + * + * When thresholds are specified as a desired number of bins, or with the + * built-in thresholds implementations, + * [d3.ticks](https://github.com/d3/d3-array/blob/main/README.md#ticks) is used + * for numeric domains and + * [d3.utcTicks](https://github.com/d3/d3-time/blob/main/README.md#utcTicks) is + * used for temporal domains. + */ +export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | T[] | number; -/** Options for the bin transform. */ -export interface BinOptions { +/** Options for the bin transform, with a domain of type T. */ +export interface BinOptions { /** - * If true or a positive number, produce a cumulative distribution; if a - * negative number, produce a [complementary cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29) - * distribution; if false or zero (the default), produce a frequency + * If false or zero (default), produce a frequency distribution; if true or a + * positive number, produce a cumulative distribution; if a negative number, + * produce a [complementary cumulative](https://en.wikipedia.org/wiki/Cumulative_distribution_function#Complementary_cumulative_distribution_function_.28tail_distribution.29) * distribution. */ cumulative?: boolean | number; /** - * The domain of allowed values; if specified, values outside the domain will - * be ignored; otherwise defaults to the extent [*min*, *max*] of input - * values. If a function, it is passed the input values and must return the - * domain. When **thresholds** are specified as an interval and the default - * domain is used, the start and end of the domain will be extended to align - * with the interval. + * The domain of allowed values; optional. If specified as [*min*, *max*], + * values outside this extent will be ignored. If a function, it is passed the + * observed input values and must return the domain [*min*, *max*]. When + * **thresholds** are specified as an interval and no domain is specified, the + * effective domain will be extended to align with the interval. */ domain?: ((values: T[]) => [min: T, max: T]) | [min: T, max: T]; /** - * How to subdivide the domain into bins. May be one of: + * How to subdivide the domain into discrete bins; defaults to *auto*; one of: + * + * - a named threshold implementation such as *auto* (default) or *sturges* + * - a function that returns an array, count, or range interval + * - a range interval + * - an array of *n* threshold values for *n* - 1 bins + * - a count representing the desired number of bins (a hint; not guaranteed) + * + * For example, for about ten bins: * - * * *auto* (default) - Scott’s rule, capped at 200 bins - * * *freedman-diaconis* - the [Freedman–Diaconis rule](https://en.wikipedia.org/wiki/Freedman–Diaconis_rule) - * * *scott* - [Scott’s normal reference rule](https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule) - * * *sturges* - [Sturges’ formula](https://en.wikipedia.org/wiki/Histogram#Sturges.27_formula) - * * a count representing the desired number of bins (a hint; not guaranteed) - * * an array of *n* threshold values for *n* - 1 bins - * * an interval; see **interval** - * * a function that returns an array, count, or interval + * ```js + * Plot.rectY(numbers, Plot.binX({y: "count"}, {thresholds: 10})) + * ``` */ thresholds?: Thresholds; /** - * How to subdivide the domain into bins; an alternative to **thresholds**. - * May be either: an interval object that implements *floor*, *offset*, and - * *range* methods; a named time interval such as *day*; or a number. If a - * number *n*, threshold values are consecutive multiples of *n* that span the - * domain. + * How to subdivide the domain into discrete bins; a stricter alternative to + * the **thresholds** option allowing the use of shorthand numeric intervals; + * one of: + * + * - an object that implements *floor*, *offset*, and *range* methods + * - a named time interval such as *day* (for date intervals) + * - a number (for number intervals), defining intervals at integer multiples of *n* + * + * For example, for integer bins: + * + * ```js + * Plot.rectY(numbers, Plot.binX({y: "count"}, {interval: 1})) + * ``` */ interval?: RangeInterval; } -/** How to reduce binned values. */ +/** + * How to reduce binned values; one of: + * + * - a standard reducer name, such as *count* or *first* + * - *x* - the middle of the bin’s *x* extent (when binning on *x*) + * - *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) + * - *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) + * - *y* - the middle of the bin’s *y* extent (when binning on *y*) + * - *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) + * - *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method + * + * When a reducer function or implementation is used with the bin transform, it + * is passed the bin extent {x1, x2, y1, y2} as an additional argument. + */ export type BinReducer = | Reducer | BinReducerFunction @@ -69,16 +124,16 @@ export type BinReducer = | "y2"; /** A functional bin reducer implementation. */ -export type BinReducerFunction = (values: any[], extent: {x1: any; y1: any; x2: any; y2: any}) => any; +export type BinReducerFunction = (values: S[], extent: {x1: any; y1: any; x2: any; y2: any}) => T; /** A bin reducer implementation. */ -export interface BinReducerImplementation { +export interface BinReducerImplementation { /** * Given an *index* representing the contents of the current bin, the array of * input channel *values*, and the current bin’s *extent*, returns the - * corresponding reduced value to output. + * corresponding reduced output value. */ - reduceIndex(index: number[], values: any[], extent: {x1: any; y1: any; x2: any; y2: any}): any; + reduceIndex(index: number[], values: S[], extent: {x1: any; y1: any; x2: any; y2: any}): T; // TODO scope // TODO label } @@ -89,13 +144,17 @@ export interface BinOutputOptions extends BinOptions { * How to reduce data; defaults to the identity reducer, outputting the array * of data for each bin in input order. */ - data?: BinReducer | null; + data?: BinReducer; /** * How to filter bins: if the reducer emits a falsey value, the bin will be * dropped; by default, empty bins are dropped. Use null to disable filtering * and return all bins, for example to impute missing zeroes when summing * values for a line chart. + * + * ```js + * Plot.binX({y: "count", filter: null}, {x: "weight"}) + * ``` */ filter?: BinReducer | null; @@ -116,7 +175,11 @@ export interface BinOutputOptions extends BinOptions { reverse?: boolean; } -/** How to reduce binned channel values. */ +/** + * How to reduce binned channel values. + * + * TODO default **title** and **href** reducers + */ export type BinOutputs = ChannelReducers & BinOutputOptions; /** @@ -129,46 +192,23 @@ export type BinOutputs = ChannelReducers & BinOutputOptions; * Plot.rectY(penguins, Plot.binX({y: "count"}, {x: "culmen_length_mm"})) * ``` * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *x* - the middle of the bin’s *x* extent (when binning on *x*) - * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) - * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) - * * *y* - the middle of the bin’s *y* extent (when binning on *y*) - * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) - * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * * Most aggregation methods require binding the output channel to an input * channel; for example, if you want the **y** output channel to be a *sum* (not * merely a count), there should be a corresponding **y** input channel * specifying which values to sum. If there is not, *sum* will be equivalent to * *count*. * - * To control how *x* is divided into bins, the following options are supported: + * TODO Group on {z, fill, stroke}, then optionally on y, then bin x. Will not + * group on y if generating explicit y, y1, or y2 output channel. Otherwise + * generates implicit y output channel. + * + * TODO If no explicit x output channel, generates x1 and x2 output channels + * representing the extent of each bin, and x output channels representing the + * midpoint, say for for labels. * - * * **thresholds** - the threshold values; see below - * * **interval** - an alternative method of specifying thresholds - * * **domain** - values outside the domain will be omitted - * * **cumulative** - if positive, each bin will contain all lesser bins + * TODO x defaults to identity + * + * TODO default insetLeft and insetRight */ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transformed; @@ -182,46 +222,23 @@ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transfo * Plot.rectX(penguins, Plot.binY({x: "count"}, {y: "culmen_length_mm"})) * ``` * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *x* - the middle of the bin’s *x* extent (when binning on *x*) - * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) - * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) - * * *y* - the middle of the bin’s *y* extent (when binning on *y*) - * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) - * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * * Most aggregation methods require binding the output channel to an input * channel; for example, if you want the **y** output channel to be a *sum* (not * merely a count), there should be a corresponding **y** input channel * specifying which values to sum. If there is not, *sum* will be equivalent to * *count*. * - * To control how *y* is divided into bins, the following options are supported: + * TODO Group on {z, fill, stroke}, then optionally on x, then bin y. Will not + * group on x if generating explicit x, x1, or x2 output channel. Otherwise + * generates implicit x output channel. * - * * **thresholds** - the threshold values; see below - * * **interval** - an alternative method of specifying thresholds - * * **domain** - values outside the domain will be omitted - * * **cumulative** - if positive, each bin will contain all lesser bins + * If no explicit y output channel, generates y1 and y2 output channels + * representing the extent of each bin, and y output channels representing the + * midpoint, say for for labels. + * + * TODO y defaults to identity + * + * TODO default insetTop and insetBottom */ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transformed; @@ -235,49 +252,28 @@ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transfo * Plot.rect(penguins, Plot.bin({fill: "count"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) * ``` * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *x* - the middle of the bin’s *x* extent (when binning on *x*) - * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) - * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) - * * *y* - the middle of the bin’s *y* extent (when binning on *y*) - * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) - * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * * Most aggregation methods require binding the output channel to an input * channel; for example, if you want the **fill** output channel to be a *sum* * (not merely a count), there should be a corresponding **fill** input channel * specifying which values to sum. If there is not, *sum* will be equivalent to * *count*. * - * To control how *x* and *y* are divided into bins, the following options are supported: - * - * * **thresholds** - the threshold values; see below - * * **interval** - an alternative method of specifying thresholds - * * **domain** - values outside the domain will be omitted - * * **cumulative** - if positive, each bin will contain all lesser bins - * * To pass separate binning options for *x* and *y*, the **x** and **y** input * channels can be specified as an object with the options above and a **value** * option to specify the input channel values. (🌶 NOT TYPED.) + * + * If no explicit x output channel, generates x1 and x2 output channels + * representing the extent of each bin, and x output channels representing the + * midpoint, say for for labels. + + * Likewise if no explicit y output channel, generates y1 and y2 output channels + * representing the extent of each bin, and y output channels representing the + * midpoint, say for for labels. + * + * TODO Group on {z, fill, stroke}, then bin on x and y. + * + * TODO tuple defaults + * + * TODO default insetTop, insetRight, insetBottom, insetLeft */ export function bin(outputs?: BinOutputs, options?: T & BinOptions): Transformed; diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index acc9ed4144..a431fc9694 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -8,7 +8,7 @@ export interface GroupOutputOptions { * How to reduce data; defaults to the identity reducer, outputting the array * of data for each group in input order. */ - data?: Reducer | null; + data?: Reducer; /** * How to filter groups: if the reducer emits a falsey value, the group will @@ -19,11 +19,11 @@ export interface GroupOutputOptions { filter?: Reducer | null; /** - * How to order groups. By default, groups are returned in ascending natural - * order along *x*, *y*, and *z* (or *fill* or *stroke*). Group order affects - * draw order of overlapping marks, and may be useful in conjunction with the - * stack transform which defaults to input order. For example to place the - * smallest group within each stack on the baseline: + * How to order groups; if null (default), groups are returned in ascending + * natural order along *x*, *y*, and *z* (or *fill* or *stroke*). Group order + * affects draw order of overlapping marks, and may be useful in conjunction + * with the stack transform which defaults to input order. For example to + * place the smallest group within each stack on the baseline: * * ```js * Plot.groupX({y: "count", sort: "count"}, {fill: "sex", x: "sport"}) @@ -39,224 +39,120 @@ export interface GroupOutputOptions { export type GroupOutputs = ChannelReducers & GroupOutputOptions; /** - * Aggregates ordinal or categorical data—such as names—into groups and then - * computes summary statistics for each group such as a count or sum. Groups are - * computed on the first channel of *z*, *fill*, or *stroke*, if any. If none of - * *z*, *fill*, or *stroke* are channels, then all data (within each facet) is - * placed into a single group. + * Groups on the first channel of *z*, *fill*, or *stroke*, if any, and then for + * each channel in the specified *outputs*, applies the corresponding *reduce* + * method to produce new channel values from the grouped input channel values. + * Each *reduce* method may be one of: * - * ```js - * Plot.groupZ({x: "proportion"}, {fill: "species"}) - * ``` - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **r** output channel to be a *sum* (not - * merely a count), there should be a corresponding **r** input channel - * specifying which values to sum. + * For example, for a horizontal stacked bar chart: * - * You can control whether a channel is computed before or after grouping. If a - * channel is declared only in *options* (and it is not a special group-eligible - * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed - * after grouping and be passed the grouped data: each datum is the array of - * input data corresponding to the current group. + * ```js + * Plot.barX(penguins, Plot.groupZ({x: "proportion"}, {fill: "species"})) + * ``` * - * The default reducer for the **title** channel returns a summary list of the - * top 5 values with the corresponding number of occurrences. + * If **title** is not in *outputs* but is in *options*, it defaults to + * summarizing the most common values. If **href** is not in *output* but is in + * *options*, it defaults to *first*. * - * See also **groupX**, **groupY**, and **group** if you need to group by *x*, - * *y*, or both. + * Non-grouping channels declared in *options* but not *outputs* are computed on + * reduced data after grouping, which defaults to the array of data for the + * current group. */ export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; /** - * Aggregates ordinal or categorical data—such as names—into groups and then - * computes summary statistics for each group such as a count or sum. Typically - * used with the **barY** mark for a categorical histogram. Groups are computed - * on *x* and the first channel of *z*, *fill*, or *stroke*, if any. - * - * ```js - * Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"}) - * ``` - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* + * Groups on the *x* input channel; then subdivides groups on the first channel + * of *z*, *fill*, or *stroke*, if any; and then for each channel in the + * specified *outputs*, applies the corresponding *reduce* method to produce new + * channel values from the grouped input channel values. Each *reduce* method + * may be one of: * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **r** output channel to be a *sum* (not - * merely a count), there should be a corresponding **r** input channel - * specifying which values to sum. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * You can control whether a channel is computed before or after grouping. If a - * channel is declared only in *options* (and it is not a special group-eligible - * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed - * after grouping and be passed the grouped data: each datum is the array of - * input data corresponding to the current group. + * For example, for a vertical bar chart of species by total mass: * - * The default reducer for the **title** channel returns a summary list of the - * top 5 values with the corresponding number of occurrences. + * ```js + * Plot.barY(penguins, Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"})) + * ``` * - * The outputs may also include *filter* and *sort* options (with *reverse*) to - * specify which groups are generated. Use filter: null to generate empty - * groups, for example to impute sum=0 for a line chart. The *sort* option can - * also target the domain of an associated scale such as *x*, *fx* or *fy*. + * If **x** is not in *options*, it defaults to identity, assuming that the data + * is ordinal. If **x** is not in *outputs*, it defaults to *first*; the *x1* + * and *x2* channels, if any, will also be dropped from the returned *options*. + * If **title** is not in *outputs* but is in *options*, it defaults to + * summarizing the most common values. If **href** is not in *output* but is in + * *options*, it defaults to *first*. * - * See also **groupZ**, **groupY**, and **group** if you need to group by - * series, *y*, or both *x* and *y*. + * Non-grouping channels declared in *options* but not *outputs* are computed on + * reduced data after grouping, which defaults to the array of data for the + * current group. */ export function groupX(outputs?: GroupOutputs, options?: T): Transformed; /** - * Aggregates ordinal or categorical data—such as names—into groups and then - * computes summary statistics for each group such as a count or sum. Typically - * used with the **barX** mark for a categorical histogram. Groups are computed - * on *y* and the first channel of *z*, *fill*, or *stroke*, if any. - * - * ```js - * Plot.groupY({x: "sum"}, {y: "species", x: "body_mass_g"}) - * ``` - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* + * Groups on the *y* channel; then subdivides groups on the first channel of + * *z*, *fill*, or *stroke*, if any; and then for each channel in the specified + * *outputs*, applies the corresponding *reduce* method to produce new channel + * values from the grouped input channel values. Each *reduce* method may be one + * of: * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **r** output channel to be a *sum* (not - * merely a count), there should be a corresponding **r** input channel - * specifying which values to sum. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * You can control whether a channel is computed before or after grouping. If a - * channel is declared only in *options* (and it is not a special group-eligible - * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed - * after grouping and be passed the grouped data: each datum is the array of - * input data corresponding to the current group. + * For example, for a vertical bar chart of species by total mass: * - * The default reducer for the **title** channel returns a summary list of the - * top 5 values with the corresponding number of occurrences. + * ```js + * Plot.barY(penguins, Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"})) + * ``` * - * The outputs may also include *filter* and *sort* options (with *reverse*) to - * specify which groups are generated. Use filter: null to generate empty - * groups, for example to impute sum=0 for a line chart. The *sort* option can - * also target the domain of an associated scale such as *y*, *fx* or *fy*. + * If **y** is not in *options*, it defaults to identity, assuming that the data + * is ordinal. If **y** is not in *outputs*, it defaults to *first*; the *y1* + * and *y2* channels, if any, will also be dropped from the returned *options*. + * If **title** is not in *outputs* but is in *options*, it defaults to + * summarizing the most common values. If **href** is not in *output* but is in + * *options*, it defaults to *first*. * - * See also **groupZ**, **groupX**, and **group** if you need to group by - * series, *x*, or both *x* and *y*. + * Non-grouping channels declared in *options* but not *outputs* are computed on + * reduced data after grouping, which defaults to the array of data for the + * current group. */ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; /** - * Aggregates ordinal or categorical data—such as names—into groups and then - * computes summary statistics for each group such as a count or sum. Typically - * used with the **cell** mark for a categorical heatmap. Groups are computed on - * *x* and *y*, and the first channel of *z*, *fill*, or *stroke*, if any. - * - * ```js - * Plot.group({fill: "count"}, {x: "island", y: "species"}) - * ``` - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* + * Groups on the *x* and *y* channels; then subdivides groups on the first + * channel of *z*, *fill*, or *stroke*, if any; and then for each channel in the + * specified *outputs*, applies the corresponding *reduce* method to produce new + * channel values from the grouped input channel values. Each *reduce* method + * may be one of: * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **r** output channel to be a *sum* (not - * merely a count), there should be a corresponding **r** input channel - * specifying which values to sum. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * You can control whether a channel is computed before or after grouping. If a - * channel is declared only in *options* (and it is not a special group-eligible - * channel such as *x*, *y*, *z*, *fill*, or *stroke*), it will be computed - * after grouping and be passed the grouped data: each datum is the array of - * input data corresponding to the current group. + * For example, for a heatmap of penguins by species and island: * - * The default reducer for the **title** channel returns a summary list of the - * top 5 values with the corresponding number of occurrences. - * - * The outputs may also include *filter* and *sort* options (with *reverse*) to - * specify which groups are generated. Use filter: null to generate empty - * groups, for example to impute sum=0 for empty cells. + * ```js + * Plot.cell(penguins, Plot.group({fill: "count"}, {x: "island", y: "species"})) + * ``` * - * See also **groupZ**, **groupX**, and **groupY** if you need to group by - * series, *x*, or *y*. + * If neither **x** nor **y** are in *options*, then **x** and **y** default to + * accessors assuming that *data* contains tuples [[*x₀*, *y₀*], [*x₁*, *y₁*], + * [*x₂*, *y₂*], …]. If **x** is not in *outputs*, it defaults to *first*; the + * *x1* and *x2* channels, if any, will also be dropped from the returned + * *options*. Likewise if **y** is not in *outputs*, it defaults to *first*; the + * *y1* and *y2* channels, if any, will also be dropped from the returned + * *options*. If **title** is not in *outputs* but is in *options*, it defaults + * to summarizing the most common values. If **href** is not in *output* but is + * in *options*, it defaults to *first*. + * + * Non-grouping channels declared in *options* but not *outputs* are computed on + * reduced data after grouping, which defaults to the array of data for the + * current group. */ export function group(outputs?: GroupOutputs, options?: T): Transformed; diff --git a/src/transforms/map.d.ts b/src/transforms/map.d.ts index 95c4f04b30..322de5e728 100644 --- a/src/transforms/map.d.ts +++ b/src/transforms/map.d.ts @@ -1,24 +1,41 @@ import type {ChannelName, ChannelValue} from "../channel.js"; import type {Transformed} from "./basic.js"; -/** A shorthand functional map implementation (from source S to target T). */ +/** + * A shorthand functional map implementation: given an array of input channel + * *values*, returns the corresponding array of mapped output channel values. + * The returned array must have the same length as the given input. + */ export type MapFunction = (values: S[]) => T[]; -/** The built-in map implementations. */ +/** + * The built-in map implementations; one of: + * + * - *cumsum* - a cumulative sum + * - *rank* - the rank of each value in the sorted array + * - *quantile* - the rank, normalized between 0 and 1 + */ export type MapName = "cumsum" | "rank" | "quantile"; -/** A map implementation (from source S to target T). */ +/** A map implementation. */ export interface MapImplementation { /** - * This method is repeatedly passed the index for each series (an array of - * integers), the corresponding input channel’s array of values, and the - * output channel’s array of values; it must populate the slots specified by - * the index in the output array. + * Given the *index* for each series (an array of integers), the input + * channel’s array of *source* values, and the output channel’s array of + * *target* values, populates the slots in *target* specified by *index* with + * the desired mapped output values. This method is invoked separately for + * each series. */ mapIndex(index: number[], source: S[], target: T[]): void; } -/** How to produce new channel values for each series. */ +/** + * How to produce new channel values for each series; one of: + * + * - a named map implementation such as *cumsum* or *rank* + * - a function to be passed an array of values, returning new values + * - an object that implements the *mapIndex* method + */ export type Map = MapImplementation | MapFunction | MapName; /** Outputs for the map transform. */ @@ -36,63 +53,59 @@ export interface MapOptions { /** * Groups on the first channel of *z*, *fill*, or *stroke*, if any, and then * applies the specified *map* method to each of the *x*, *x1*, and *x2* - * channels declared in the *options*. The *map* may be specified as: + * channels in the specified *options* to produce new channel values for each + * series. The *map* method may be one of: + * + * - a named map implementation such as *cumsum* or *rank* + * - a function to be passed an array of values, returning new values + * - an object that implements the *mapIndex* method * - * * *cumsum* - a cumulative sum - * * *rank* - the rank of each value in the sorted array - * * *quantile* - the rank, normalized between 0 and 1 - * * a function to be passed an array of values, returning new values - * * an object that implements the *mapIndex* method + * For example, to produce a cumulative sum of random numbers on the *x* + * channel: * - * If a function is used, it must return an array of the same length as the - * given input. If a *mapIndex* method is used, it is repeatedly passed the - * index for each series (an array of integers), the corresponding input - * channel’s array of values, and the output channel’s array of values; it must - * populate the slots specified by the index in the output array. + * ```js + * Plot.mapX("cumsum", {x: d3.randomNormal()}) + * ``` */ export function mapX(map: Map, options?: T & MapOptions): Transformed; /** * Groups on the first channel of *z*, *fill*, or *stroke*, if any, and then * applies the specified map method to each of the *y*, *y1*, and *y2* channels - * declared in the *options*. The *map* may be specified as: + * in the specified *options* to produce new channel values for each series. The + * *map* method may be one of: + * + * - a named map implementation such as *cumsum* or *rank* + * - a function to be passed an array of values, returning new values + * - an object that implements the *mapIndex* method * - * * *cumsum* - a cumulative sum - * * *rank* - the rank of each value in the sorted array - * * *quantile* - the rank, normalized between 0 and 1 - * * a function to be passed an array of values, returning new values - * * an object that implements the *mapIndex* method + * For example, to produce a cumulative sum of random numbers on the *y* + * channel: * - * If a function is used, it must return an array of the same length as the - * given input. If a *mapIndex* method is used, it is repeatedly passed the - * index for each series (an array of integers), the corresponding input - * channel’s array of values, and the output channel’s array of values; it must - * populate the slots specified by the index in the output array. + * ```js + * Plot.mapY("cumsum", {y: d3.randomNormal()}) + * ``` */ export function mapY(map: Map, options?: T & MapOptions): Transformed; /** * Groups on the first channel of *z*, *fill*, or *stroke*, if any, and then for - * each channel declared in the specified *outputs*, applies the corresponding - * *map* method. Each channel in *outputs* must have a corresponding input - * channel in *options*. + * each channel in the specified *outputs*, applies the corresponding *map* + * method to produce new channel values for each series. Each *map* method may + * be one of: + * + * - a named map implementation such as *cumsum* or *rank* + * - a function to be passed an array of values, returning new values + * - an object that implements the *mapIndex* method + * + * For example, to produce a cumulative sum of random numbers on the *y* + * channel: * * ```js * Plot.map({y: "cumsum"}, {y: d3.randomNormal()}) * ``` * - * Each *map* in *outputs* may be specified as: - * - * * *cumsum* - a cumulative sum - * * *rank* - the rank of each value in the sorted array - * * *quantile* - the rank, normalized between 0 and 1 - * * a function to be passed an array of values, returning new values - * * an object that implements the *mapIndex* method - * - * If a function is used, it must return an array of the same length as the - * given input. If a *mapIndex* method is used, it is repeatedly passed the - * index for each series (an array of integers), the corresponding input - * channel’s array of values, and the output channel’s array of values; it must - * populate the slots specified by the index in the output array. + * Each declared channel in *outputs* must have a corresponding input channel in + * *options*. */ export function map(outputs?: MapOutputs, options?: T & MapOptions): Transformed; diff --git a/src/transforms/normalize.d.ts b/src/transforms/normalize.d.ts index a5219e22f5..2b9a723232 100644 --- a/src/transforms/normalize.d.ts +++ b/src/transforms/normalize.d.ts @@ -2,6 +2,20 @@ import type {ReducerPercentile} from "../reducer.js"; import type {Transformed} from "./basic.js"; import type {Map} from "./map.js"; +/** + * The built-in normalize basis implementations; one of: + * + * - *first* - the first value, as in an index chart + * - *last* - the last value + * - *min* - the minimum value + * - *max* - the maximum value + * - *mean* - the mean value (average) + * - *median* - the median value + * - *pXX* - the percentile value, where XX is a number in [00,99] + * - *sum* - the sum of values + * - *extent* - the minimum is mapped to zero, and the maximum to one + * - *deviation* - subtract the mean, then divide by the standard deviation + */ export type NormalizeBasisName = | "deviation" | "first" @@ -14,13 +28,23 @@ export type NormalizeBasisName = | "extent" | ReducerPercentile; -export type NormalizeBasisFunction = (index: number[], values: any[]) => number; +/** + * A functional basis implementation: given an array of input channel *values* + * for the current series, returns the corresponding basis number (divisor). + */ +export type NormalizeBasisFunction = (values: T[]) => number; -/** How to normalize series values. */ +/** + * How to normalize series values; one of: + * + * - a named basis method such as *first* or *median* + * - a function that takes an array of series values and returns a basis number + */ export type NormalizeBasis = NormalizeBasisName | NormalizeBasisFunction; /** Options for the normalize transform. */ export interface NormalizeOptions { + /** How to normalize series values; defaults to *first*. */ basis?: NormalizeBasis; } @@ -31,23 +55,9 @@ export interface NormalizeOptions { * if the series values are [*x₀*, *x₁*, *x₂*, …] and the *first* basis is used, * the derived series values would be [*x₀* / *x₀*, *x₁* / *x₀*, *x₂* / *x₀*, …] * as in an index chart. - * - * The **basis** option specifies how to normalize series values. It can be: - * - * * *first* (default) - the first value, as in an index chart - * * *last* - the last value - * * *min* - the minimum value - * * *max* - the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *sum* - the sum of values - * * *extent* - the minimum is mapped to zero, and the maximum to one - * * *deviation* - subtract the mean, then divide by the standard deviation - * * a function to be passed an array of series values, returning a number */ -export function normalizeX(options?: T & NormalizeOptions): Transformed; export function normalizeX(basis?: NormalizeBasis, options?: T): Transformed; +export function normalizeX(options?: T & NormalizeOptions): Transformed; /** * Groups data into series using the first channel of *z*, *fill*, or *stroke* @@ -56,23 +66,9 @@ export function normalizeX(basis?: NormalizeBasis, options?: T): Transformed< * if the series values are [*y₀*, *y₁*, *y₂*, …] and the *first* basis is used, * the derived series values would be [*y₀* / *y₀*, *y₁* / *y₀*, *y₂* / *y₀*, …] * as in an index chart. - * - * The **basis** option specifies how to normalize series values. It can be: - * - * * *first* (default) - the first value, as in an index chart - * * *last* - the last value - * * *min* - the minimum value - * * *max* - the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *sum* - the sum of values - * * *extent* - the minimum is mapped to zero, and the maximum to one - * * *deviation* - subtract the mean, then divide by the standard deviation - * * a function to be passed an array of series values, returning a number */ -export function normalizeY(options?: T & NormalizeOptions): Transformed; export function normalizeY(basis?: NormalizeBasis, options?: T): Transformed; +export function normalizeY(options?: T & NormalizeOptions): Transformed; /** * Given a normalize *basis*, returns a corresponding map implementation for use @@ -82,19 +78,5 @@ export function normalizeY(basis?: NormalizeBasis, options?: T): Transformed< * ```js * Plot.map({title: Plot.normalize("first")}, {x: "Date", title: "Close", stroke: "Symbol"}) * ``` - * - * The **basis** option specifies how to normalize series values. It can be: - * - * * *first* (default) - the first value, as in an index chart - * * *last* - the last value - * * *min* - the minimum value - * * *max* - the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *sum* - the sum of values - * * *extent* - the minimum is mapped to zero, and the maximum to one - * * *deviation* - subtract the mean, then divide by the standard deviation - * * a function to be passed an array of series values, returning a number */ export function normalize(basis: NormalizeBasis): Map; diff --git a/src/transforms/normalize.js b/src/transforms/normalize.js index 0967312325..d5429e9af6 100644 --- a/src/transforms/normalize.js +++ b/src/transforms/normalize.js @@ -57,8 +57,8 @@ function normalizeAccessor(f) { const normalizeExtent = { mapIndex(I, S, T) { - const [s1, s2] = extent(I, (i) => S[i]), - d = s2 - s1; + const [s1, s2] = extent(I, (i) => S[i]); + const d = s2 - s1; for (const i of I) { T[i] = S[i] === null ? NaN : (S[i] - s1) / d; } diff --git a/src/transforms/stack.d.ts b/src/transforms/stack.d.ts index 45b7413e8d..8b0e932852 100644 --- a/src/transforms/stack.d.ts +++ b/src/transforms/stack.d.ts @@ -1,6 +1,19 @@ import type {ChannelValue} from "../channel.js"; import type {Transformed} from "./basic.js"; +/** + * A built-in stack offset method; one of: + * + * - *normalize* - rescale each stack to fill [0, 1] + * - *center* - align the centers of all stacks + * - *wiggle* - translate stacks to minimize apparent movement + * + * If a given stack has zero total value, the *normalize* offset will not adjust + * the stack’s position. Both the *center* and *wiggle* offsets ensure that the + * lowest element across stacks starts at zero for better default axes. The + * *wiggle* offset is recommended for streamgraphs in conjunction with the + * *inside-out* order. For more, see [Byron & Wattenberg](http://leebyron.com/streamgraph/). + */ export type StackOffsetName = | "center" | "normalize" @@ -8,56 +21,69 @@ export type StackOffsetName = | ("expand" & Record) // deprecated; use normalize | ("silhouette" & Record); // deprecated; use center +/** + * A stack offset implementation: given an *index* grouped by facet and *x*, the + * output channel values *y1* and *y2*, and the channel values *z*, mutates the + * values in *y1* and *y2* given by the *index* to translate and scale stacks as + * desired. For the stackX transform, substitute *y* for *x*, and *x1* & *x2* + * for *y1* & *y2*. + */ export type StackOffsetFunction = (index: number[][][], y1: number[], y2: number[], z: any[]) => void; -/** How the baseline of stacked layers may be offset. */ +/** + * How the baseline of stacked layers may be offset; one of: + * + * - a named stack offset method such as *wiggle* or *center* + * - a function to be passed an *index*, *y1*, *y2*, and *z* values + */ export type StackOffset = StackOffsetName | StackOffsetFunction; +/** + * The built-in stack order methods; one of: + * + * - *x* - alias of *value*; for stackX only + * - *y* - alias of *value*; for stackY only + * - *value* - ascending value (or descending with **reverse**) + * - *sum* - total value per series + * - *appearance* - position of maximum value per series + * - *inside-out* (default with *wiggle*) - order the earliest-appearing series on the inside + * + * The *inside-out* order is recommended for streamgraphs in conjunction with + * the *wiggle* offset. For more, see [Byron & Wattenberg](http://leebyron.com/streamgraph/). + */ export type StackOrderName = "value" | "x" | "y" | "z" | "sum" | "appearance" | "inside-out"; -/** How to order layers prior to stacking. */ -export type StackOrder = - | StackOrderName - | (string & Record) // field name; see also https://github.com/microsoft/TypeScript/issues/29729 - | ((d: any, i: number) => any) // function of data - | any[]; // explicit ordinal values +/** + * How to order layers prior to stacking; one of: + * + * - a named stack order method such as *inside-out* or *sum* + * - a field name, for natural order of the corresponding values + * - a function of data, for natural order of the corresponding values + * - an array of explicit *z* values in the desired order + */ +export type StackOrder = StackOrderName | (string & Record) | ((d: any, i: number) => any) | any[]; /** Options for the stack transform. */ export interface StackOptions { /** - * After all values have been stacked from zero, an optional **offset** can be - * applied to translate or scale the stacks: - * - * - null (default) - a zero baseline - * - *normalize* - rescale each stack to fill [0, 1] - * - *center* - align the centers of all stacks - * - *wiggle* - translate stacks to minimize apparent movement - * - a function to be passed a nested index, and start, end, and *z* values - * - * If a given stack has zero total value, the *expand* offset will not adjust - * the stack’s position. Both the *center* and *wiggle* offsets ensure that - * the lowest element across stacks starts at zero for better default axes. - * The *wiggle* offset is recommended for streamgraphs, and if used, changes - * the default **order** to *inside-out*. - * - * For details on the *wiggle* offset, see [Byron & Wattenberg](http://leebyron.com/streamgraph/). + * After stacking, an optional **offset** can be applied to translate and + * scale stacks, say to produce a streamgraph; defaults to null for a zero + * baseline (*y* = 0 for stackY, and *x* = 0 for stackX). If the *wiggle* + * offset is used, the default **order** changes to *inside-out*. */ offset?: StackOffset | null; /** - * The order in which stacks are layered: + * The order in which stacks are layered; one of: * - * - null (default) - input order - * - *value* - ascending value (or descending with **reverse**) - * - *x* - alias of *value*; for stackX only - * - *y* - alias of *value*; for stackY only - * - *sum* - total value per series - * - *appearance* - position of maximum value per series - * - *inside-out* (default with *wiggle*) - order the earliest-appearing series on the inside - * - a named field or function of data - natural order - * - an array enumerating all the *z* values in the desired order + * - null (default) for input order + * - a named stack order method such as *inside-out* or *sum* + * - a field name, for natural order of the corresponding values + * - a function of data, for natural order of the corresponding values + * - an array of explicit *z* values in the desired order * - * For details on the *inside-out* order, see [Byron & Wattenberg](http://leebyron.com/streamgraph/). + * If the *wiggle* **offset** is used, as for a streamgraph, the default + * changes to *inside-out*. */ order?: StackOrder | null; @@ -81,24 +107,24 @@ export interface StackOptions { * the midpoint between *x1* and *x2*, for example to place a label. If not * specified, the input channel *x* defaults to the constant one. */ -export function stackX(options?: T & StackOptions): Transformed; export function stackX(stackOptions?: StackOptions, options?: T): Transformed; +export function stackX(options?: T & StackOptions): Transformed; /** * Like **stackX**, but returns the starting position *x1* as the *x* channel, * for example to position a dot on the left-hand side of each element of a * stack. */ -export function stackX1(options?: T & StackOptions): Transformed; export function stackX1(stackOptions?: StackOptions, options?: T): Transformed; +export function stackX1(options?: T & StackOptions): Transformed; /** * Like **stackX**, but returns the starting position *x2* as the *x* channel, * for example to position a dot on the right-hand side of each element of a * stack. */ -export function stackX2(options?: T & StackOptions): Transformed; export function stackX2(stackOptions?: StackOptions, options?: T): Transformed; +export function stackX2(options?: T & StackOptions): Transformed; /** * Transforms a length channel *y* into starting and ending position channels @@ -109,19 +135,19 @@ export function stackX2(stackOptions?: StackOptions, options?: T): Transforme * *y1* and *y2*, for example to place a label. If not specified, the input * channel *y* defaults to the constant one. */ -export function stackY(options?: T & StackOptions): Transformed; export function stackY(stackOptions?: StackOptions, options?: T): Transformed; +export function stackY(options?: T & StackOptions): Transformed; /** * Like **stackY**, but returns the starting position *y1* as the *y* channel, * for example to position a dot at the bottom of each element of a stack. */ -export function stackY1(options?: T & StackOptions): Transformed; export function stackY1(stackOptions?: StackOptions, options?: T): Transformed; +export function stackY1(options?: T & StackOptions): Transformed; /** * Like **stackY**, but returns the ending position *y2* as the *y* channel, * for example to position a dot at the top of each element of a stack. */ -export function stackY2(options?: T & StackOptions): Transformed; export function stackY2(stackOptions?: StackOptions, options?: T): Transformed; +export function stackY2(options?: T & StackOptions): Transformed; diff --git a/src/transforms/window.d.ts b/src/transforms/window.d.ts index 4cf18610d0..b265a71b1f 100644 --- a/src/transforms/window.d.ts +++ b/src/transforms/window.d.ts @@ -1,60 +1,70 @@ -import type {ReducerPercentile} from "../reducer.js"; +import type {ReducerFunction, ReducerPercentile} from "../reducer.js"; import type {Transformed} from "./basic.js"; import type {Map} from "./map.js"; +/** + * The built-in window reducer implementations; one of: + * + * - *difference* - the difference between the last and first window value + * - *ratio* - the ratio of the last and first window value + * - *first* - the first value + * - *last* - the last value + * - *deviation* - the standard deviation + * - *sum* - the sum of values + * - *min* - the minimum value + * - *max* - the maximum value + * - *mean* - the mean (average) value + * - *median* - the median value + * - *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + * - *mode* - the mode (most common occurrence) + * - *pXX* - the percentile value, where XX is a number in [00,99] + */ export type WindowReducerName = + | "difference" // specific to window + | "ratio" // specific to window + | "first" + | "last" | "deviation" + | "sum" + | "min" | "max" | "mean" | "median" - | "min" - | "mode" - | "sum" | "variance" - | "difference" - | "ratio" - | "first" - | "last" + | "mode" | ReducerPercentile; -export type WindowReducerFunction = (values: any[]) => any; - -export type WindowReducer = WindowReducerName | WindowReducerFunction; +/** + * How to reduce aggregated (windowed) values; one of: + * + * - a named window reducer implementation such as *mean* or *difference* + * - a function that takes an array of values and returns the reduced value + */ +export type WindowReducer = WindowReducerName | ReducerFunction; +/** Options for the window transform. */ export interface WindowOptions { /** - * The size (number of consecutive values) in the window; includes the current - * value. + * The required size (number of consecutive values) in the window; includes + * the current value. */ k: number; /** * How to produce a summary statistic from the **k** values in the current - * window. The reducer may be specified as: + * window; one of: * - * * *min* - the minimum - * * *max* - the maximum - * * *mean* (default) - the mean (average) - * * *median* - the median - * * *mode* - the mode (most common occurrence) - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *sum* - the sum of values - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *difference* - the difference between the last and first window value - * * *ratio* - the ratio of the last and first window value - * * *first* - the first value - * * *last* - the last value - * * a function to be passed an array of **k** values + * - a named window reducer implementation such as *mean* or *difference* + * - a function that takes an array of values and returns the reduced value */ reduce?: WindowReducer; /** * How to align the rolling window, placing the current value: * - * * *start* - as the first element in the window - * * *middle* (default) - in the middle of the window, rounding down if **k** is even - * * *end* - as the last element in the window + * - *start* - as the first element in the window + * - *middle* (default) - in the middle of the window, rounding down if **k** is even + * - *end* - as the last element in the window */ anchor?: "start" | "middle" | "end"; @@ -62,12 +72,6 @@ export interface WindowOptions { shift?: "leading" | "centered" | "trailing"; /** - * If true, the output start values or end values or both (depending on the - * **anchor**) of each series may be undefined since there are not enough - * elements to create a window of size **k**; output values may also be - * undefined if some of the input values in the corresponding window are - * undefined. - * * If false (the default), the window will be automatically truncated as * needed, and undefined input values are ignored. For example, if **k** is 24 * and **anchor** is *middle*, then the initial 11 values have effective @@ -75,48 +79,56 @@ export interface WindowOptions { * effective window sizes of 23, 22, 21, … 12. Values computed with a * truncated window may be noisy; if you would prefer to not show this data, * set the **strict** option to true. + * + * If true, the output start values or end values or both (depending on the + * **anchor**) of each series may be undefined since there are not enough + * elements to create a window of size **k**; output values may also be + * undefined if some of the input values in the corresponding window are + * undefined. */ strict?: boolean; } /** - * Computes a moving window of *x*, *x1*, and *x2* channel values and then - * derives a summary statistic from values in the current window, say to compute - * a rolling average. The window options can be specified as the first argument, - * or grouped with the *options*. For example, the following are equivalent: + * Groups data into series using the first channel of *z*, *fill*, or *stroke* + * (if any), then derives new *x*, *x1*, and *x2* channels by computing a moving + * window of channel values and deriving reduced values from the window. For + * example, to compute a rolling average in *x*: * * ```js * Plot.windowX(24, {x: "Anomaly", y: "Date"}); - * Plot.windowX({k: 24, reduce: "mean", x: "Anomaly", y: "Date"}); - * Plot.windowX({k: 24, reduce: "mean"}, {x: "Anomaly", y: "Date"}); * ``` + * + * If *windowOptions* is a number, it is shorthand for the window size **k**. */ -export function windowX(options?: T & WindowOptions): Transformed; export function windowX(windowOptions?: WindowOptions | number, options?: T): Transformed; +export function windowX(options?: T & WindowOptions): Transformed; /** - * Computes a moving window of *y*, *y1*, and *y2* channel values around and - * then derives a summary statistic from values in the current window, say to - * compute a rolling average. The window options can be specified as the first - * argument, or grouped with the *options*. For example, the following are - * equivalent: + * Groups data into series using the first channel of *z*, *fill*, or *stroke* + * (if any), then derives new *y*, *y1*, and *y2* channels by computing a moving + * window of channel values and deriving reduced values from the window. For + * example, to compute a rolling average in *y*: * * ```js * Plot.windowY(24, {x: "Date", y: "Anomaly"}); - * Plot.windowY({k: 24, reduce: "mean", x: "Date", y: "Anomaly"}); - * Plot.windowY({k: 24, reduce: "mean"}, {x: "Date", y: "Anomaly"}); * ``` + * + * If *windowOptions* is a number, it is shorthand for the window size **k**. */ -export function windowY(options?: T & WindowOptions): Transformed; export function windowY(windowOptions?: WindowOptions | number, options?: T): Transformed; +export function windowY(options?: T & WindowOptions): Transformed; /** - * Returns a window map method suitable for use with Plot.map. The options are - * the window size *k*, or an object with properties *k*, *anchor*, *reduce*, or - * *strict*. + * Given the specified window *options*, returns a corresponding map + * implementation for use with the map transform, allowing the window transform + * to be applied to arbitrary channels instead of only *x* and *y*. For example, + * to compute a rolling average for the *title* channel: * * ```js - * Plot.map({y: Plot.window(24)}, {x: "Date", y: "Close", stroke: "Symbol"}) + * Plot.map({title: Plot.window(24)}, {x: "Date", title: "Anomaly"}) * ``` + * + * If *options* is a number, it is shorthand for the window size **k**. */ export function window(options?: WindowOptions | number): Map; From 0ea70d72bd18867c082cb54cbd13825437927c7d Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 17:44:23 -0700 Subject: [PATCH 08/11] checkpoint edits --- src/channel.d.ts | 15 +++- src/reducer.d.ts | 177 +++----------------------------------- src/transforms/bin.d.ts | 19 ++-- src/transforms/group.d.ts | 32 +------ src/transforms/map.d.ts | 17 ++-- 5 files changed, 47 insertions(+), 213 deletions(-) diff --git a/src/channel.d.ts b/src/channel.d.ts index e4e261e0d7..48cfe0f1e5 100644 --- a/src/channel.d.ts +++ b/src/channel.d.ts @@ -193,7 +193,20 @@ export type ChannelDomainValueSpec = ChannelDomainValue | ({value: ChannelDomain /** How to impute scale domains from channel values. */ export type ChannelDomainSort = {[key in ScaleName]?: ChannelDomainValueSpec} & ChannelDomainOptions; -/** How to reduce channel values, e.g. when binning or grouping. */ +/** + * Output channels for aggregating transforms, such as bin and group. Each + * declared output channel has an associated reducer, and typically a + * corresponding input channel in *options*. Non-grouping channels declared in + * *options* but not *outputs* are computed on reduced data after grouping, + * which defaults to the array of data for the current group. + * + * If **title** is in *options* but not *outputs*, the reducer defaults to + * summarizing the most common values. If **href** is in *options* but not + * *outputs*, the reducer defaults to *first*. When **x1** or **x2** is in + * *outputs*, reads the input channel **x** if **x1** or **x2** is not in + * *options*; likewise for **y1** or **y2**, reads the input channel **y** if + * **y1** or **y2** is not in *options*. + */ export type ChannelReducers = {[key in ChannelName]?: T | {reduce: T; scale?: Channel["scale"]} | null}; /** Abstract (unscaled) values, and associated scale, per channel. */ diff --git a/src/reducer.d.ts b/src/reducer.d.ts index c8556fdaad..8602e355b2 100644 --- a/src/reducer.d.ts +++ b/src/reducer.d.ts @@ -8,155 +8,7 @@ export type ReducerPercentile = | "p75"; /** - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *x* - the middle of the bin’s *x* extent (when binning on *x*) - * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) - * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) - * * *y* - the middle of the bin’s *y* extent (when binning on *y*) - * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) - * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *x* - the middle of the bin’s *x* extent (when binning on *x*) - * * *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) - * * *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) - * * *y* - the middle of the bin’s *y* extent (when binning on *y*) - * * *y1* - the lower bound of the bin’s *y* extent (when binning on *y*) - * * *y2* - the upper bound of the bin’s *y* extent (when binning on *y*) - * * a function to be passed the array of values for each bin and the extent of the bin - * * an object with a *reduce* method, and optionally a *scope* - * - * * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* - * - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* - * - * - * The following aggregation methods are supported: - * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted - * frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *mode* - the value with the most occurrences - * * *pXX* - the percentile value, where XX is a number in [00,99] - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s - * algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * a function - passed the array of values for each group - * * an object with a *reduce* method, an optionally a *scope* - * - * - * The following aggregation methods are supported: + * The built-in reducer implementations; one of: * * - *first* - the first value, in input order * - *last* - the last value, in input order @@ -165,17 +17,16 @@ export type ReducerPercentile = * - *sum* - the sum of values * - *proportion* - the sum proportional to the overall total (weighted frequency) * - *proportion-facet* - the sum proportional to the facet total + * - *deviation* - the standard deviation * - *min* - the minimum value * - *min-index* - the zero-based index of the minimum value * - *max* - the maximum value * - *max-index* - the zero-based index of the maximum value * - *mean* - the mean value (average) * - *median* - the median value + * - *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) * - *mode* - the value with the most occurrences * - *pXX* - the percentile value, where XX is a number in [00,99] - * - *deviation* - the standard deviation - * - *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * */ export type ReducerName = | "first" @@ -197,19 +48,18 @@ export type ReducerName = | ReducerPercentile; /** - * A shorthand functional reducer implementation (from source S to target T): - * given an array of input channel *values*, returns the corresponding reduced - * output value. + * A shorthand functional reducer implementation: given an array of input + * channel *values*, returns the corresponding reduced output value. */ export type ReducerFunction = (values: S[]) => T; -/** A reducer implementation (from source S to target T). */ +/** A reducer implementation. */ export interface ReducerImplementation { /** - * Given an *index* representing the contents of the current group, the array - * of input channel *values*, returns the corresponding reduced output value. - * TODO If no input channel is provided (e.g., *count*) then *values* may be - * undefined. + * Given an *index* representing the contents of the current group and the + * input channel’s array of *values*, returns the corresponding reduced output + * value. If no input channel is supplied (e.g., as with the *count* reducer) + * then *values* may be undefined. */ reduceIndex(index: number[], values: S[]): T; // TODO scope @@ -222,12 +72,5 @@ export interface ReducerImplementation { * - a named reducer implementation such as *count* or *sum* * - a function that takes an array of values and returns the reduced value * - an object that implements the *reduceIndex* method - * - * TODO Some reducers require an input channel (e.g., *mean*) while others do - * not (e.g., *count*). - * - * TODO When reducing the *x1*, we may read from the *x* channel if *x1* does - * not exist, and likewise for *x2*. When reducing the *y1*, we may read from - * the *y* channel if *y1* does not exist, and likewise for *y2*. */ export type Reducer = ReducerName | ReducerFunction | ReducerImplementation; diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 507200e982..72ae9bf54b 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -99,7 +99,7 @@ export interface BinOptions { /** * How to reduce binned values; one of: * - * - a standard reducer name, such as *count* or *first* + * - a generic reducer name, such as *count* or *first* * - *x* - the middle of the bin’s *x* extent (when binning on *x*) * - *x1* - the lower bound of the bin’s *x* extent (when binning on *x*) * - *x2* - the upper bound of the bin’s *x* extent (when binning on *x*) @@ -123,15 +123,20 @@ export type BinReducer = | "y1" | "y2"; -/** A functional bin reducer implementation. */ +/** + * A shorthand functional bin reducer implementation: given an array of input + * channel *values*, and the current bin’s *extent*, returns the corresponding + * reduced output value. + */ export type BinReducerFunction = (values: S[], extent: {x1: any; y1: any; x2: any; y2: any}) => T; /** A bin reducer implementation. */ export interface BinReducerImplementation { /** - * Given an *index* representing the contents of the current bin, the array of - * input channel *values*, and the current bin’s *extent*, returns the - * corresponding reduced output value. + * Given an *index* representing the contents of the current bin, the input + * channel’s array of *values*, and the current bin’s *extent*, returns the + * corresponding reduced output value. If no input channel is supplied (e.g., + * as with the *count* reducer) then *values* may be undefined. */ reduceIndex(index: number[], values: S[], extent: {x1: any; y1: any; x2: any; y2: any}): T; // TODO scope @@ -176,9 +181,7 @@ export interface BinOutputOptions extends BinOptions { } /** - * How to reduce binned channel values. - * - * TODO default **title** and **href** reducers + * Output channels (and options) for the bin transform. */ export type BinOutputs = ChannelReducers & BinOutputOptions; diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index a431fc9694..4fc74e390f 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -35,7 +35,7 @@ export interface GroupOutputOptions { reverse?: boolean; } -/** How to reduce grouped channel values. */ +/** Output channels (and options) for the group transform. */ export type GroupOutputs = ChannelReducers & GroupOutputOptions; /** @@ -53,14 +53,6 @@ export type GroupOutputs = ChannelReducers & GroupOutputOptions; * ```js * Plot.barX(penguins, Plot.groupZ({x: "proportion"}, {fill: "species"})) * ``` - * - * If **title** is not in *outputs* but is in *options*, it defaults to - * summarizing the most common values. If **href** is not in *output* but is in - * *options*, it defaults to *first*. - * - * Non-grouping channels declared in *options* but not *outputs* are computed on - * reduced data after grouping, which defaults to the array of data for the - * current group. */ export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; @@ -84,13 +76,6 @@ export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; * If **x** is not in *options*, it defaults to identity, assuming that the data * is ordinal. If **x** is not in *outputs*, it defaults to *first*; the *x1* * and *x2* channels, if any, will also be dropped from the returned *options*. - * If **title** is not in *outputs* but is in *options*, it defaults to - * summarizing the most common values. If **href** is not in *output* but is in - * *options*, it defaults to *first*. - * - * Non-grouping channels declared in *options* but not *outputs* are computed on - * reduced data after grouping, which defaults to the array of data for the - * current group. */ export function groupX(outputs?: GroupOutputs, options?: T): Transformed; @@ -114,13 +99,6 @@ export function groupX(outputs?: GroupOutputs, options?: T): Transformed; * If **y** is not in *options*, it defaults to identity, assuming that the data * is ordinal. If **y** is not in *outputs*, it defaults to *first*; the *y1* * and *y2* channels, if any, will also be dropped from the returned *options*. - * If **title** is not in *outputs* but is in *options*, it defaults to - * summarizing the most common values. If **href** is not in *output* but is in - * *options*, it defaults to *first*. - * - * Non-grouping channels declared in *options* but not *outputs* are computed on - * reduced data after grouping, which defaults to the array of data for the - * current group. */ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; @@ -147,12 +125,6 @@ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; * *x1* and *x2* channels, if any, will also be dropped from the returned * *options*. Likewise if **y** is not in *outputs*, it defaults to *first*; the * *y1* and *y2* channels, if any, will also be dropped from the returned - * *options*. If **title** is not in *outputs* but is in *options*, it defaults - * to summarizing the most common values. If **href** is not in *output* but is - * in *options*, it defaults to *first*. - * - * Non-grouping channels declared in *options* but not *outputs* are computed on - * reduced data after grouping, which defaults to the array of data for the - * current group. + * *options*. */ export function group(outputs?: GroupOutputs, options?: T): Transformed; diff --git a/src/transforms/map.d.ts b/src/transforms/map.d.ts index 322de5e728..2fa3774baa 100644 --- a/src/transforms/map.d.ts +++ b/src/transforms/map.d.ts @@ -20,11 +20,10 @@ export type MapName = "cumsum" | "rank" | "quantile"; /** A map implementation. */ export interface MapImplementation { /** - * Given the *index* for each series (an array of integers), the input + * Given an *index* representing the contents of the current series, the input * channel’s array of *source* values, and the output channel’s array of * *target* values, populates the slots in *target* specified by *index* with - * the desired mapped output values. This method is invoked separately for - * each series. + * the desired mapped output values. */ mapIndex(index: number[], source: S[], target: T[]): void; } @@ -38,7 +37,14 @@ export interface MapImplementation { */ export type Map = MapImplementation | MapFunction | MapName; -/** Outputs for the map transform. */ +/** + * Outputs for the map transform. Each declared output channel must have a + * corresponding input channel in *options*. + * + * When **x1** or **x2** is in *outputs*, reads the input channel **x** if + * **x1** or **x2** is not in *options*; likewise for **y1** or **y2**, reads + * the input channel **y** if **y1** or **y2** is not in *options*. + */ export type MapOutputs = {[key in ChannelName]?: Map}; /** Options for the map transform. */ @@ -104,8 +110,5 @@ export function mapY(map: Map, options?: T & MapOptions): Transformed; * ```js * Plot.map({y: "cumsum"}, {y: d3.randomNormal()}) * ``` - * - * Each declared channel in *outputs* must have a corresponding input channel in - * *options*. */ export function map(outputs?: MapOutputs, options?: T & MapOptions): Transformed; From bf08595cd2ab2f7b6ac76fba48098cd3466a983e Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 19:51:40 -0700 Subject: [PATCH 09/11] edits --- src/transforms/bin.d.ts | 152 +++++++++++++++++++------------------- src/transforms/group.d.ts | 74 +++++++++++-------- 2 files changed, 121 insertions(+), 105 deletions(-) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 72ae9bf54b..96190a858f 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -166,9 +166,9 @@ export interface BinOutputOptions extends BinOptions { /** * How to order bins. By default, bins are returned in ascending natural order * along *x*, *y*, and *z* (or *fill* or *stroke*). Bin order affects draw - * order of overlapping marks, and may be useful in conjunction with the stack - * transform which defaults to input order. For example to place the smallest - * bin within each stack on the baseline: + * order of overlapping marks, and may be useful with the stack transform + * which defaults to input order. For example to place the smallest bin within + * each stack on the baseline: * * ```js * Plot.binX({y: "count", sort: "count"}, {fill: "sex", x: "weight"}) @@ -180,103 +180,107 @@ export interface BinOutputOptions extends BinOptions { reverse?: boolean; } -/** - * Output channels (and options) for the bin transform. - */ +/** Output channels (and options) for the bin transform. */ export type BinOutputs = ChannelReducers & BinOutputOptions; /** - * Aggregates continuous data—quantitative or temporal values such as - * temperatures or times—into discrete bins and then computes summary statistics - * for each bin such as a count or sum. The binX transform is often used in - * conjunction with the rectY mark, to make histograms. + * Bins on the **x** channel; then subdivides bins on the first channel of + * **z**, **fill**, or **stroke**, if any; then further subdivides bins on the + * **y** channel, if any and if none of **y**, **y1**, and **y2** are in + * *outputs*; and lastly for each channel in the specified *outputs*, applies + * the corresponding *reduce* method to produce new channel values from the + * binned input channel values. Each *reduce* method may be one of: * - * ``` - * Plot.rectY(penguins, Plot.binX({y: "count"}, {x: "culmen_length_mm"})) - * ``` - * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **y** output channel to be a *sum* (not - * merely a count), there should be a corresponding **y** input channel - * specifying which values to sum. If there is not, *sum* will be equivalent to - * *count*. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * TODO Group on {z, fill, stroke}, then optionally on y, then bin x. Will not - * group on y if generating explicit y, y1, or y2 output channel. Otherwise - * generates implicit y output channel. + * For example, for a histogram of observed culmen lengths: * - * TODO If no explicit x output channel, generates x1 and x2 output channels - * representing the extent of each bin, and x output channels representing the - * midpoint, say for for labels. + * ```js + * Plot.rectY(penguins, Plot.binX({y: "count"}, {x: "culmen_length_mm"})) + * ``` * - * TODO x defaults to identity + * The binX transform is often used with the rectY mark to make histograms; it + * is intended for aggregating continuous quantitative or temporal data, such as + * temperatures or times, into discrete bins. See the groupX transform for + * ordinal or categorical data. * - * TODO default insetLeft and insetRight + * If **x** is not in *options*, it defaults to identity. If **x** is not in + * *outputs*, by default produces **x1** and **x2** output channels representing + * the extent of each bin and an **x** output channel representing the bin + * midpoint, say for for labels. If **y** is not in outputs, **y1** and **y2** + * will be dropped from the returned *options*. The **insetLeft** and + * **insetRight** options default to 0.5. */ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transformed; /** - * Aggregates continuous data—quantitative or temporal values such as - * temperatures or times—into discrete bins and then computes summary statistics - * for each bin such as a count or sum. The binY transform is often used in - * conjunction with the rectX mark, to make vertical histograms. - * - * ``` - * Plot.rectX(penguins, Plot.binY({x: "count"}, {y: "culmen_length_mm"})) - * ``` + * Bins on the **y** channel; then subdivides bins on the first channel of + * **z**, **fill**, or **stroke**, if any; then further subdivides bins on the + * **x** channel, if any and if none of **x**, **x1**, and **x2** are in + * *outputs*; and lastly for each channel in the specified *outputs*, applies + * the corresponding *reduce* method to produce new channel values from the + * binned input channel values. Each *reduce* method may be one of: * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **y** output channel to be a *sum* (not - * merely a count), there should be a corresponding **y** input channel - * specifying which values to sum. If there is not, *sum* will be equivalent to - * *count*. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * TODO Group on {z, fill, stroke}, then optionally on x, then bin y. Will not - * group on x if generating explicit x, x1, or x2 output channel. Otherwise - * generates implicit x output channel. + * For example, for a histogram of observed culmen lengths: * - * If no explicit y output channel, generates y1 and y2 output channels - * representing the extent of each bin, and y output channels representing the - * midpoint, say for for labels. + * ```js + * Plot.rectX(penguins, Plot.binY({x: "count"}, {y: "culmen_length_mm"})) + * ``` * - * TODO y defaults to identity + * The binY transform is often used with the rectX mark to make histograms; it + * is intended for aggregating continuous quantitative or temporal data, such as + * temperatures or times, into discrete bins. See the groupY transform for + * ordinal or categorical data. * - * TODO default insetTop and insetBottom + * If **y** is not in *options*, it defaults to identity. If **y** is not in + * *outputs*, by default produces **y1** and **y2** output channels representing + * the extent of each bin and a **y** output channel representing the bin + * midpoint, say for for labels. If **x** is not in outputs, **x1** and **x2** + * will be dropped from the returned *options*. The **insetTop** and + * **insetBottom** options default to 0.5. */ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transformed; /** - * Aggregates continuous data—quantitative or temporal values such as - * temperatures or times—into discrete *x* and *y* bins and then computes - * summary statistics for each bin such as a count or sum. The bin transform is - * often used in conjunction with the rect mark, to make heatmaps. + * Bins on the **x** and **y** channels; then subdivides bins on the first + * channel of **z**, **fill**, or **stroke**, if any; and lastly for each + * channel in the specified *outputs*, applies the corresponding *reduce* method + * to produce new channel values from the binned input channel values. Each + * *reduce* method may be one of: * - * ``` - * Plot.rect(penguins, Plot.bin({fill: "count"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) - * ``` - * - * Most aggregation methods require binding the output channel to an input - * channel; for example, if you want the **fill** output channel to be a *sum* - * (not merely a count), there should be a corresponding **fill** input channel - * specifying which values to sum. If there is not, *sum* will be equivalent to - * *count*. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * To pass separate binning options for *x* and *y*, the **x** and **y** input - * channels can be specified as an object with the options above and a **value** - * option to specify the input channel values. (🌶 NOT TYPED.) + * For example, for a heatmap of observed culmen lengths and depths: * - * If no explicit x output channel, generates x1 and x2 output channels - * representing the extent of each bin, and x output channels representing the - * midpoint, say for for labels. - - * Likewise if no explicit y output channel, generates y1 and y2 output channels - * representing the extent of each bin, and y output channels representing the - * midpoint, say for for labels. + * ```js + * Plot.rect(penguins, Plot.bin({fill: "count"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) + * ``` * - * TODO Group on {z, fill, stroke}, then bin on x and y. + * The bin transform is often used with the rect mark to make heatmaps; it is + * intended for aggregating continuous quantitative or temporal data, such as + * temperatures or times, into discrete bins. See the group transform for + * ordinal or categorical data. * - * TODO tuple defaults + * If neither **x** nor **y** are in *options*, then **x** and **y** default to + * accessors assuming that *data* contains tuples [[*x₀*, *y₀*], [*x₁*, *y₁*], + * [*x₂*, *y₂*], …]. If **x** is not in *outputs*, by default produces **x1** + * and **x2** output channels representing the horizontal extent of each bin and + * a **x** output channel representing the horizontal midpoint, say for for + * labels. Likewise if **y** is not in *outputs*, by default produces **y1** and + * **y2** output channels representing the vertical extent of each bin and a + * **y** output channel representing the vertical midpoint. The **insetTop**, + * **insetRight**, **insetBottom**, and **insetLeft** options default to 0.5. * - * TODO default insetTop, insetRight, insetBottom, insetLeft + * TODO To pass separate binning options for *x* and *y*, the **x** and **y** + * input channels can be specified as an object with the options above and a + * **value** option to specify the input channel values. (🌶 NOT TYPED.) */ export function bin(outputs?: BinOutputs, options?: T & BinOptions): Transformed; diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index 4fc74e390f..3d92f84626 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -21,9 +21,9 @@ export interface GroupOutputOptions { /** * How to order groups; if null (default), groups are returned in ascending * natural order along *x*, *y*, and *z* (or *fill* or *stroke*). Group order - * affects draw order of overlapping marks, and may be useful in conjunction - * with the stack transform which defaults to input order. For example to - * place the smallest group within each stack on the baseline: + * affects draw order of overlapping marks, and may be useful with the stack + * transform which defaults to input order. For example to place the smallest + * group within each stack on the baseline: * * ```js * Plot.groupX({y: "count", sort: "count"}, {fill: "sex", x: "sport"}) @@ -39,10 +39,10 @@ export interface GroupOutputOptions { export type GroupOutputs = ChannelReducers & GroupOutputOptions; /** - * Groups on the first channel of *z*, *fill*, or *stroke*, if any, and then for - * each channel in the specified *outputs*, applies the corresponding *reduce* - * method to produce new channel values from the grouped input channel values. - * Each *reduce* method may be one of: + * Groups on the first channel of **z**, **fill**, or **stroke**, if any, and + * then for each channel in the specified *outputs*, applies the corresponding + * *reduce* method to produce new channel values from the grouped input channel + * values. Each *reduce* method may be one of: * * - a named reducer implementation such as *count* or *sum* * - a function that takes an array of values and returns the reduced value @@ -57,8 +57,8 @@ export type GroupOutputs = ChannelReducers & GroupOutputOptions; export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; /** - * Groups on the *x* input channel; then subdivides groups on the first channel - * of *z*, *fill*, or *stroke*, if any; and then for each channel in the + * Groups on the **x** channel; then subdivides groups on the first channel of + * **z**, **fill**, or **stroke**, if any; and then for each channel in the * specified *outputs*, applies the corresponding *reduce* method to produce new * channel values from the grouped input channel values. Each *reduce* method * may be one of: @@ -73,41 +73,49 @@ export function groupZ(outputs?: GroupOutputs, options?: T): Transformed; * Plot.barY(penguins, Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"})) * ``` * - * If **x** is not in *options*, it defaults to identity, assuming that the data - * is ordinal. If **x** is not in *outputs*, it defaults to *first*; the *x1* - * and *x2* channels, if any, will also be dropped from the returned *options*. + * The groupX transform is often used with the barY mark to make bar charts; it + * is intended for aggregating ordinal or categorical data, such as names. See + * the binX transform for continuous data. + * + * If **x** is not in *options*, it defaults to identity. If **x** is not in + * *outputs*, it defaults to *first*, and the **x1** and **x2** channels, if + * any, will be dropped from the returned *options*. */ export function groupX(outputs?: GroupOutputs, options?: T): Transformed; /** - * Groups on the *y* channel; then subdivides groups on the first channel of - * *z*, *fill*, or *stroke*, if any; and then for each channel in the specified - * *outputs*, applies the corresponding *reduce* method to produce new channel - * values from the grouped input channel values. Each *reduce* method may be one - * of: + * Groups on the **y** channel; then subdivides groups on the first channel of + * **z**, **fill**, or **stroke**, if any; and then for each channel in the + * specified *outputs*, applies the corresponding *reduce* method to produce new + * channel values from the grouped input channel values. Each *reduce* method + * may be one of: * * - a named reducer implementation such as *count* or *sum* * - a function that takes an array of values and returns the reduced value * - an object that implements the *reduceIndex* method * - * For example, for a vertical bar chart of species by total mass: + * For example, for a horizontal bar chart of species by total mass: * * ```js - * Plot.barY(penguins, Plot.groupX({y: "sum"}, {x: "species", y: "body_mass_g"})) + * Plot.barX(penguins, Plot.groupY({x: "sum"}, {y: "species", x: "body_mass_g"})) * ``` * - * If **y** is not in *options*, it defaults to identity, assuming that the data - * is ordinal. If **y** is not in *outputs*, it defaults to *first*; the *y1* - * and *y2* channels, if any, will also be dropped from the returned *options*. + * The groupY transform is often used with the barX mark to make bar charts; it + * is intended for aggregating ordinal or categorical data, such as names. See + * the binY transform for continuous data. + * + * If **y** is not in *options*, it defaults to identity. If **y** is not in + * *outputs*, it defaults to *first*, and the **y1** and **y2** channels, if + * any, will be dropped from the returned *options*. */ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; /** - * Groups on the *x* and *y* channels; then subdivides groups on the first - * channel of *z*, *fill*, or *stroke*, if any; and then for each channel in the - * specified *outputs*, applies the corresponding *reduce* method to produce new - * channel values from the grouped input channel values. Each *reduce* method - * may be one of: + * Groups on the **x** and **y** channels; then subdivides groups on the first + * channel of **z**, **fill**, or **stroke**, if any; and then for each channel + * in the specified *outputs*, applies the corresponding *reduce* method to + * produce new channel values from the grouped input channel values. Each + * *reduce* method may be one of: * * - a named reducer implementation such as *count* or *sum* * - a function that takes an array of values and returns the reduced value @@ -119,12 +127,16 @@ export function groupY(outputs?: GroupOutputs, options?: T): Transformed; * Plot.cell(penguins, Plot.group({fill: "count"}, {x: "island", y: "species"})) * ``` * + * The group transform is often used with the cell mark to make heatmaps; it is + * intended for aggregating ordinal or categorical data, such as names. See the + * bin transform for continuous data. + * * If neither **x** nor **y** are in *options*, then **x** and **y** default to * accessors assuming that *data* contains tuples [[*x₀*, *y₀*], [*x₁*, *y₁*], - * [*x₂*, *y₂*], …]. If **x** is not in *outputs*, it defaults to *first*; the - * *x1* and *x2* channels, if any, will also be dropped from the returned - * *options*. Likewise if **y** is not in *outputs*, it defaults to *first*; the - * *y1* and *y2* channels, if any, will also be dropped from the returned + * [*x₂*, *y₂*], …]. If **x** is not in *outputs*, it defaults to *first*, and + * the **x1** and **x2** channels, if any, will be dropped from the returned + * *options*. Likewise if **y** is not in *outputs*, it defaults to *first*, and + * the **y1** and **y2** channels, if any, will be dropped from the returned * *options*. */ export function group(outputs?: GroupOutputs, options?: T): Transformed; From 71856c3e31af5a4d0ff96dcd7328b7e2c843069e Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 20:16:37 -0700 Subject: [PATCH 10/11] fix #1384 - ChannelValueBinSpec --- src/transforms/bin.d.ts | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 96190a858f..7c75210b36 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -1,4 +1,4 @@ -import type {ChannelReducers} from "../channel.js"; +import type {ChannelReducers, ChannelValue} from "../channel.js"; import type {RangeInterval} from "../interval.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; @@ -42,8 +42,8 @@ export type ThresholdsFunction = (values: T[], min: T, max: T) => Range */ export type Thresholds = ThresholdsName | ThresholdsFunction | RangeInterval | T[] | number; -/** Options for the bin transform, with a domain of type T. */ -export interface BinOptions { +/** Options for the bin transform. */ +export interface BinOptions { /** * If false or zero (default), produce a frequency distribution; if true or a * positive number, produce a cumulative distribution; if a negative number, @@ -59,7 +59,7 @@ export interface BinOptions { * **thresholds** are specified as an interval and no domain is specified, the * effective domain will be extended to align with the interval. */ - domain?: ((values: T[]) => [min: T, max: T]) | [min: T, max: T]; + domain?: ((values: any[]) => [min: any, max: any]) | [min: any, max: any]; /** * How to subdivide the domain into discrete bins; defaults to *auto*; one of: @@ -76,7 +76,7 @@ export interface BinOptions { * Plot.rectY(numbers, Plot.binX({y: "count"}, {thresholds: 10})) * ``` */ - thresholds?: Thresholds; + thresholds?: Thresholds; /** * How to subdivide the domain into discrete bins; a stricter alternative to @@ -93,7 +93,7 @@ export interface BinOptions { * Plot.rectY(numbers, Plot.binX({y: "count"}, {interval: 1})) * ``` */ - interval?: RangeInterval; + interval?: RangeInterval; } /** @@ -180,6 +180,21 @@ export interface BinOutputOptions extends BinOptions { reverse?: boolean; } +/** + * When binning on **x** or **y**, you can specify the channel values as a + * {value} object to provide separate bin options for each. + */ +export type ChannelValueBinSpec = ChannelValue | ({value: ChannelValue} & BinOptions); + +/** Inputs to the binX transform. */ +export type BinXInputs = Omit & {x?: ChannelValueBinSpec} & BinOptions; + +/** Inputs to the binY transform. */ +export type BinYInputs = Omit & {y?: ChannelValueBinSpec} & BinOptions; + +/** Inputs to the bin transform. */ +export type BinInputs = Omit & {x?: ChannelValueBinSpec; y?: ChannelValueBinSpec} & BinOptions; + /** Output channels (and options) for the bin transform. */ export type BinOutputs = ChannelReducers & BinOutputOptions; @@ -213,7 +228,7 @@ export type BinOutputs = ChannelReducers & BinOutputOptions; * will be dropped from the returned *options*. The **insetLeft** and * **insetRight** options default to 0.5. */ -export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transformed; +export function binX(outputs?: BinOutputs, options?: BinXInputs): Transformed; /** * Bins on the **y** channel; then subdivides bins on the first channel of @@ -245,7 +260,7 @@ export function binX(outputs?: BinOutputs, options?: T & BinOptions): Transfo * will be dropped from the returned *options*. The **insetTop** and * **insetBottom** options default to 0.5. */ -export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transformed; +export function binY(outputs?: BinOutputs, options?: BinYInputs): Transformed; /** * Bins on the **x** and **y** channels; then subdivides bins on the first @@ -278,9 +293,5 @@ export function binY(outputs?: BinOutputs, options?: T & BinOptions): Transfo * **y2** output channels representing the vertical extent of each bin and a * **y** output channel representing the vertical midpoint. The **insetTop**, * **insetRight**, **insetBottom**, and **insetLeft** options default to 0.5. - * - * TODO To pass separate binning options for *x* and *y*, the **x** and **y** - * input channels can be specified as an object with the options above and a - * **value** option to specify the input channel values. (🌶 NOT TYPED.) */ -export function bin(outputs?: BinOutputs, options?: T & BinOptions): Transformed; +export function bin(outputs?: BinOutputs, options?: BinInputs): Transformed; From c5b55bf6cebd5275339c2a9ad50f9730b25316e1 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 24 Mar 2023 20:31:38 -0700 Subject: [PATCH 11/11] consolidate BinOutputs; hexbin edits --- src/transforms/bin.d.ts | 40 ++---------------------------- src/transforms/group.d.ts | 12 ++++----- src/transforms/hexbin.d.ts | 51 ++++++++++++++++---------------------- 3 files changed, 29 insertions(+), 74 deletions(-) diff --git a/src/transforms/bin.d.ts b/src/transforms/bin.d.ts index 7c75210b36..2961eb7687 100644 --- a/src/transforms/bin.d.ts +++ b/src/transforms/bin.d.ts @@ -2,6 +2,7 @@ import type {ChannelReducers, ChannelValue} from "../channel.js"; import type {RangeInterval} from "../interval.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; +import type {GroupOutputOptions} from "./group.js"; /** * The built-in thresholds implementations; one of: @@ -143,43 +144,6 @@ export interface BinReducerImplementation { // TODO label } -/** Options for outputs of the bin transform. */ -export interface BinOutputOptions extends BinOptions { - /** - * How to reduce data; defaults to the identity reducer, outputting the array - * of data for each bin in input order. - */ - data?: BinReducer; - - /** - * How to filter bins: if the reducer emits a falsey value, the bin will be - * dropped; by default, empty bins are dropped. Use null to disable filtering - * and return all bins, for example to impute missing zeroes when summing - * values for a line chart. - * - * ```js - * Plot.binX({y: "count", filter: null}, {x: "weight"}) - * ``` - */ - filter?: BinReducer | null; - - /** - * How to order bins. By default, bins are returned in ascending natural order - * along *x*, *y*, and *z* (or *fill* or *stroke*). Bin order affects draw - * order of overlapping marks, and may be useful with the stack transform - * which defaults to input order. For example to place the smallest bin within - * each stack on the baseline: - * - * ```js - * Plot.binX({y: "count", sort: "count"}, {fill: "sex", x: "weight"}) - * ``` - */ - sort?: BinReducer | null; - - /** If true, reverse the order of generated bins; defaults to false. */ - reverse?: boolean; -} - /** * When binning on **x** or **y**, you can specify the channel values as a * {value} object to provide separate bin options for each. @@ -196,7 +160,7 @@ export type BinYInputs = Omit & {y?: ChannelValueBinSpec} & BinOption export type BinInputs = Omit & {x?: ChannelValueBinSpec; y?: ChannelValueBinSpec} & BinOptions; /** Output channels (and options) for the bin transform. */ -export type BinOutputs = ChannelReducers & BinOutputOptions; +export type BinOutputs = ChannelReducers & GroupOutputOptions & BinOptions; /** * Bins on the **x** channel; then subdivides bins on the first channel of diff --git a/src/transforms/group.d.ts b/src/transforms/group.d.ts index 3d92f84626..a48caecc4e 100644 --- a/src/transforms/group.d.ts +++ b/src/transforms/group.d.ts @@ -2,13 +2,13 @@ import type {ChannelReducers} from "../channel.js"; import type {Reducer} from "../reducer.js"; import type {Transformed} from "./basic.js"; -/** Options for outputs of the group transform. */ -export interface GroupOutputOptions { +/** Options for outputs of the group (and bin) transform. */ +export interface GroupOutputOptions { /** - * How to reduce data; defaults to the identity reducer, outputting the array + * How to reduce data; defaults to the identity reducer, outputting the subset * of data for each group in input order. */ - data?: Reducer; + data?: T; /** * How to filter groups: if the reducer emits a falsey value, the group will @@ -16,7 +16,7 @@ export interface GroupOutputOptions { * filtering and return all groups, for example to impute missing zeroes when * summing values for a line chart. */ - filter?: Reducer | null; + filter?: T | null; /** * How to order groups; if null (default), groups are returned in ascending @@ -29,7 +29,7 @@ export interface GroupOutputOptions { * Plot.groupX({y: "count", sort: "count"}, {fill: "sex", x: "sport"}) * ``` */ - sort?: Reducer | null; + sort?: T | null; /** If true, reverse the order of generated groups; defaults to false. */ reverse?: boolean; diff --git a/src/transforms/hexbin.d.ts b/src/transforms/hexbin.d.ts index 7deacafbb4..6f6adedff3 100644 --- a/src/transforms/hexbin.d.ts +++ b/src/transforms/hexbin.d.ts @@ -17,39 +17,30 @@ export interface HexbinOptions { } /** - * Groups points specified by the *x* and *y* channels into hexagonal bins in - * scaled coordinates (pixels), computing new *x* and *y* channels as the - * centers of each bin, and deriving new output channels by applying the - * specified reducers (such as *count*) to each bin’s values. The first of the - * *z*, *fill*, or *stroke* channels, if any, will be used to subdivide bins. + * Bins hexagonally on the scaled **x** and **y** channels; then subdivides bins + * on the first channel of **z**, **fill**, or **stroke**, if any; and lastly + * for each channel in the specified *outputs*, applies the corresponding + * *reduce* method to produce new channel values from the binned input channel + * values. Each *reduce* method may be one of: * - * The hexbin transform can be applied to any mark that consumes *x* and *y*, - * such as the dot, image, text, and vector marks. For the dot mark, the - * **symbol** option defaults to *hexagon*, and the *r* option defaults to half - * the **binWidth**. If a **fill** output channel is declared, the **stroke** - * option defaults to *none*. + * - a named reducer implementation such as *count* or *sum* + * - a function that takes an array of values and returns the reduced value + * - an object that implements the *reduceIndex* method * - * The reducer for each channel in *outputs* may be specified as: + * For example, for a heatmap of observed culmen lengths and depths: * - * * *first* - the first value, in input order - * * *last* - the last value, in input order - * * *count* - the number of elements (frequency) - * * *distinct* - the number of distinct values - * * *sum* - the sum of values - * * *proportion* - the sum proportional to the overall total (weighted frequency) - * * *proportion-facet* - the sum proportional to the facet total - * * *min* - the minimum value - * * *min-index* - the zero-based index of the minimum value - * * *max* - the maximum value - * * *max-index* - the zero-based index of the maximum value - * * *mean* - the mean value (average) - * * *median* - the median value - * * *deviation* - the standard deviation - * * *variance* - the variance per [Welford’s algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) - * * *mode* - the value with the most occurrences - * * a function to be passed the array of values for each bin - * * an object with a *reduceIndex* method + * ```js + * Plot.dot(penguins, Plot.hexbin({fill: "count"}, {x: "culmen_depth_mm", y: "culmen_length_mm"})) + * ``` * - * See also the hexgrid mark. + * The hexbin transform can be applied to any mark that consumes **x** and + * **y**, such as the dot, image, text, and vector marks; it is intended for + * aggregating continuous quantitative or temporal data, such as temperatures or + * times, into discrete hexagonal bins. For the dot mark, the **symbol** option + * defaults to *hexagon*, and the *r* option defaults to half the **binWidth**. + * If a **fill** output channel is declared, the **stroke** option defaults to + * *none*. + * + * To draw empty hexagons, see the hexgrid mark. */ export function hexbin(outputs?: ChannelReducers, options?: T & HexbinOptions): Initialized;