diff --git a/CHANGELOG.md b/CHANGELOG.md index 833787b61..f59742fe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -870,6 +870,28 @@ This release closes the following issue: +
+ +#### [@stdlib/blas/ext/base/dsnansumpw](https://github.com/stdlib-js/stdlib/tree/develop/lib/node_modules/%40stdlib/blas/ext/base/dsnansumpw) + +
+ +
+ +##### Features + +- [`de75f04`](https://github.com/stdlib-js/stdlib/commit/de75f0465fdaa762112195f16f6334b121204664) - add C `ndarray` API and refactor `blas/ext/base/dsnansumpw` [(#3262)](https://github.com/stdlib-js/stdlib/pull/3262) + +
+ + + +
+ +
+ + +
#### [@stdlib/blas/tools](https://github.com/stdlib-js/stdlib/tree/develop/lib/node_modules/%40stdlib/blas/tools) @@ -957,6 +979,7 @@ A total of 8 people contributed to this release. Thank you to the following cont
+- [`de75f04`](https://github.com/stdlib-js/stdlib/commit/de75f0465fdaa762112195f16f6334b121204664) - **feat:** add C `ndarray` API and refactor `blas/ext/base/dsnansumpw` [(#3262)](https://github.com/stdlib-js/stdlib/pull/3262) _(by Muhammad Haris, Philipp Burckhardt)_ - [`755b053`](https://github.com/stdlib-js/stdlib/commit/755b053d5b7d5cbd675c060afc1ee049e431fdde) - **docs:** add missing header in `blas/ext/base/dsnannsumors` [(#3263)](https://github.com/stdlib-js/stdlib/pull/3263) _(by Muhammad Haris)_ - [`a39d0f3`](https://github.com/stdlib-js/stdlib/commit/a39d0f372c70df837af84d321fa0b9b3d61f453b) - **refactor:** update `offset` handling and function documentation for `blas/ext/base/dnannsumors` [(#3252)](https://github.com/stdlib-js/stdlib/pull/3252) _(by Muhammad Haris)_ - [`4458c49`](https://github.com/stdlib-js/stdlib/commit/4458c49e9901bdd83048c773b8cacc6361b8729b) - **fix:** extract the scalar constant as a float in `blas/ext/base/dsapxsum` [(#3254)](https://github.com/stdlib-js/stdlib/pull/3254) _(by Muhammad Haris)_ diff --git a/ext/base/dsnansumpw/README.md b/ext/base/dsnansumpw/README.md index 731d6df06..20c995b5a 100644 --- a/ext/base/dsnansumpw/README.md +++ b/ext/base/dsnansumpw/README.md @@ -36,7 +36,7 @@ limitations under the License. var dsnansumpw = require( '@stdlib/blas/ext/base/dsnansumpw' ); ``` -#### dsnansumpw( N, x, stride ) +#### dsnansumpw( N, x, strideX ) Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result. @@ -44,9 +44,8 @@ Computes the sum of single-precision floating-point strided array elements, igno var Float32Array = require( '@stdlib/array/float32' ); var x = new Float32Array( [ 1.0, -2.0, NaN, 2.0 ] ); -var N = x.length; -var v = dsnansumpw( N, x, 1 ); +var v = dsnansumpw( x.length, x, 1 ); // returns 1.0 ``` @@ -54,9 +53,9 @@ The function has the following parameters: - **N**: number of indexed elements. - **x**: input [`Float32Array`][@stdlib/array/float32]. -- **stride**: index increment for `x`. +- **stride**: stride length for `x`. -The `N` and `stride` parameters determine which elements in the strided array are accessed at runtime. For example, to compute the sum of every other element in `x`, +The `N` and stride parameters determine which elements in the strided array are accessed at runtime. For example, to compute the sum of every other element: ```javascript var Float32Array = require( '@stdlib/array/float32' ); @@ -81,25 +80,24 @@ var v = dsnansumpw( 4, x1, 2 ); // returns 5.0 ``` -#### dsnansumpw.ndarray( N, x, stride, offset ) +#### dsnansumpw.ndarray( N, x, strideX, offsetX ) -Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using pairwise summation with extended accumulation and alternative indexing semantics. +Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation and alternative indexing semantics, and returning an extended precision result. ```javascript var Float32Array = require( '@stdlib/array/float32' ); var x = new Float32Array( [ 1.0, -2.0, NaN, 2.0 ] ); -var N = x.length; -var v = dsnansumpw.ndarray( N, x, 1, 0 ); +var v = dsnansumpw.ndarray( x.length, x, 1, 0 ); // returns 1.0 ``` The function has the following additional parameters: -- **offset**: starting index for `x`. +- **offsetX**: starting index for `x`. -While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying `buffer`, the `offset` parameter supports indexing semantics based on a starting index. For example, to calculate the sum of every other value in `x` starting from the second value +While [`typed array`][mdn-typed-array] views mandate a view offset based on the underlying `buffer`, the offset parameter supports indexing semantics based on a starting index. For example, to calculate the sum of every other element starting from the second element: ```javascript var Float32Array = require( '@stdlib/array/float32' ); @@ -155,8 +153,123 @@ console.log( v ); + + * * * +
+ +## C APIs + + + +
+ +
+ + + + + +
+ +### Usage + +```c +#include "stdlib/blas/ext/base/dsnansumpw.h" +``` + +#### stdlib_strided_dsnansumpw( N, \*X, strideX ) + +Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result. + +```c +const float x[] = { 1.0f, -2.0f, 0.0f/0.0f, 2.0f }; + +double v = stdlib_strided_dsnansumpw( 4, x, 1 ); +// returns 1.0 +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **X**: `[in] float*` input array. +- **strideX**: `[in] CBLAS_INT` stride length for `X`. + +```c +double stdlib_strided_dsnansumpw( const CBLAS_INT N, const float *X, const CBLAS_INT strideX ); +``` + +#### stdlib_strided_dsnansumpw_ndarray( N, \*X, strideX, offsetX ) + +Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation and alternative indexing semantics, and returning an extended precision result. + +```c +const float x[] = { 1.0f, -2.0f, 0.0f/0.0f, 2.0f }; + +double v = stdlib_strided_dsnansumpw_ndarray( 4, x, 1, 0 ); +// returns 1.0 +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **X**: `[in] float*` input array. +- **strideX**: `[in] CBLAS_INT` stride length for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. + +```c +double stdlib_strided_dsnansumpw_ndarray( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ); +``` + +
+ + + + + +
+ +
+ + + + + +
+ +### Examples + +```c +#include "stdlib/blas/ext/base/dsnansumpw.h" +#include + +int main( void ) { + // Create a strided array: + const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 0.0f/0.0f, 0.0f/0.0f }; + + // Specify the number of elements: + const int N = 5; + + // Specify the stride length: + const int strideX = 2; + + // Compute the sum: + double v = stdlib_strided_dsnansumpw( N, x, strideX ); + + // Print the result: + printf( "sum: %lf\n", v ); +} +``` + +
+ + + +
+ + +
## References diff --git a/ext/base/dsnansumpw/benchmark/benchmark.js b/ext/base/dsnansumpw/benchmark/benchmark.js index 189a4dc4c..eac0a9738 100644 --- a/ext/base/dsnansumpw/benchmark/benchmark.js +++ b/ext/base/dsnansumpw/benchmark/benchmark.js @@ -32,6 +32,19 @@ var dsnansumpw = require( './../lib/dsnansumpw.js' ); // FUNCTIONS // +/** +* Returns a random number. +* +* @private +* @returns {number} random number +*/ +function rand() { + if ( bernoulli( 0.8 ) > 0 ) { + return NaN; + } + return uniform( -10, 10 ); +} + /** * Creates a benchmark function. * @@ -43,13 +56,6 @@ function createBenchmark( len ) { var x = filledarrayBy( len, 'float32', rand ); return benchmark; - function rand() { - if ( bernoulli( 0.8 ) > 0 ) { - return NaN; - } - return uniform( -10, 10 ); - } - function benchmark( b ) { var v; var i; diff --git a/ext/base/dsnansumpw/benchmark/benchmark.native.js b/ext/base/dsnansumpw/benchmark/benchmark.native.js index 99559b0f4..75158c54d 100644 --- a/ext/base/dsnansumpw/benchmark/benchmark.native.js +++ b/ext/base/dsnansumpw/benchmark/benchmark.native.js @@ -41,6 +41,19 @@ var opts = { // FUNCTIONS // +/** +* Returns a random number. +* +* @private +* @returns {number} random number +*/ +function rand() { + if ( bernoulli( 0.8 ) > 0 ) { + return NaN; + } + return uniform( -10, 10 ); +} + /** * Creates a benchmark function. * @@ -52,13 +65,6 @@ function createBenchmark( len ) { var x = filledarrayBy( len, 'float32', rand ); return benchmark; - function rand() { - if ( bernoulli( 0.8 ) > 0 ) { - return NaN; - } - return uniform( -10, 10 ); - } - function benchmark( b ) { var v; var i; diff --git a/ext/base/dsnansumpw/benchmark/benchmark.ndarray.js b/ext/base/dsnansumpw/benchmark/benchmark.ndarray.js index 1312a749f..8d2af3ccb 100644 --- a/ext/base/dsnansumpw/benchmark/benchmark.ndarray.js +++ b/ext/base/dsnansumpw/benchmark/benchmark.ndarray.js @@ -32,6 +32,19 @@ var dsnansumpw = require( './../lib/ndarray.js' ); // FUNCTIONS // +/** +* Returns a random number. +* +* @private +* @returns {number} random number +*/ +function rand() { + if ( bernoulli( 0.8 ) > 0 ) { + return NaN; + } + return uniform( -10, 10 ); +} + /** * Creates a benchmark function. * @@ -43,13 +56,6 @@ function createBenchmark( len ) { var x = filledarrayBy( len, 'float32', rand ); return benchmark; - function rand() { - if ( bernoulli( 0.8 ) > 0 ) { - return NaN; - } - return uniform( -10, 10 ); - } - function benchmark( b ) { var v; var i; diff --git a/ext/base/dsnansumpw/benchmark/benchmark.ndarray.native.js b/ext/base/dsnansumpw/benchmark/benchmark.ndarray.native.js index 80823aabf..4f5c98e80 100644 --- a/ext/base/dsnansumpw/benchmark/benchmark.ndarray.native.js +++ b/ext/base/dsnansumpw/benchmark/benchmark.ndarray.native.js @@ -41,6 +41,19 @@ var opts = { // FUNCTIONS // +/** +* Returns a random number. +* +* @private +* @returns {number} random number +*/ +function rand() { + if ( bernoulli( 0.8 ) > 0 ) { + return NaN; + } + return uniform( -10, 10 ); +} + /** * Creates a benchmark function. * @@ -52,13 +65,6 @@ function createBenchmark( len ) { var x = filledarrayBy( len, 'float32', rand ); return benchmark; - function rand() { - if ( bernoulli( 0.8 ) > 0 ) { - return NaN; - } - return uniform( -10, 10 ); - } - function benchmark( b ) { var v; var i; diff --git a/ext/base/dsnansumpw/benchmark/c/benchmark.length.c b/ext/base/dsnansumpw/benchmark/c/benchmark.length.c index 369bb05be..1aaed7bca 100644 --- a/ext/base/dsnansumpw/benchmark/c/benchmark.length.c +++ b/ext/base/dsnansumpw/benchmark/c/benchmark.length.c @@ -94,7 +94,7 @@ static float rand_float( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; float x[ len ]; double v; @@ -111,6 +111,7 @@ static double benchmark( int iterations, int len ) { v = 0.0; t = tic(); for ( i = 0; i < iterations; i++ ) { + // cppcheck-suppress uninitvar v = stdlib_strided_dsnansumpw( len, x, 1 ); if ( v != v ) { printf( "should not return NaN\n" ); @@ -124,6 +125,44 @@ static double benchmark( int iterations, int len ) { return elapsed; } +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + float x[ len ]; + double v; + double t; + int i; + + for ( i = 0; i < len; i++ ) { + if ( rand_float() < 0.2f ) { + x[ i ] = 0.0f / 0.0f; // NaN + } else { + x[ i ] = ( rand_float()*20000.0f ) - 10000.0f; + } + } + v = 0.0; + t = tic(); + for ( i = 0; i < iterations; i++ ) { + // cppcheck-suppress uninitvar + v = stdlib_strided_dsnansumpw_ndarray( len, x, 1, 0 ); + if ( v != v ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( v != v ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + /** * Main execution sequence. */ @@ -146,7 +185,18 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + } + for ( i = MIN; i <= MAX; i++ ) { + len = pow( 10, i ); + iter = ITERATIONS / pow( 10, i-1 ); + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/ext/base/dsnansumpw/docs/repl.txt b/ext/base/dsnansumpw/docs/repl.txt index 18a09d034..6710654ed 100644 --- a/ext/base/dsnansumpw/docs/repl.txt +++ b/ext/base/dsnansumpw/docs/repl.txt @@ -1,11 +1,11 @@ -{{alias}}( N, x, stride ) +{{alias}}( N, x, strideX ) Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result. - The `N` and `stride` parameters determine which elements in the strided - array are accessed at runtime. + The `N` and stride parameters determine which elements in the strided array + are accessed at runtime. Indexing is relative to the first index. To introduce an offset, use a typed array view. @@ -20,8 +20,8 @@ x: Float32Array Input array. - stride: integer - Index increment. + strideX: integer + Stride length. Returns ------- @@ -35,7 +35,7 @@ > {{alias}}( x.length, x, 1 ) 1.0 - // Using `N` and `stride` parameters: + // Using `N` and stride parameters: > x = new {{alias:@stdlib/array/float32}}( [ -2.0, 1.0, 1.0, -5.0, 2.0, -1.0, NaN, NaN ] ); > {{alias}}( 4, x, 2 ) 1.0 @@ -47,13 +47,14 @@ -1.0 -{{alias}}.ndarray( N, x, stride, offset ) +{{alias}}.ndarray( N, x, strideX, offsetX ) Computes the sum of single-precision floating-point strided array elements, - ignoring `NaN` values and using pairwise summation with extended - accumulation and alternative indexing semantics. + ignoring `NaN` values, using pairwise summation with extended accumulation, + and alternative indexing semantics, and returning an extended precision + result. While typed array views mandate a view offset based on the underlying - buffer, the `offset` parameter supports indexing semantics based on a + buffer, the offset parameter supports indexing semantics based on a starting index. Parameters @@ -64,10 +65,10 @@ x: Float32Array Input array. - stride: integer - Index increment. + strideX: integer + Stride length. - offset: integer + offsetX: integer Starting index. Returns diff --git a/ext/base/dsnansumpw/docs/types/index.d.ts b/ext/base/dsnansumpw/docs/types/index.d.ts index 8df3c8697..518331324 100644 --- a/ext/base/dsnansumpw/docs/types/index.d.ts +++ b/ext/base/dsnansumpw/docs/types/index.d.ts @@ -27,7 +27,7 @@ interface Routine { * * @param N - number of indexed elements * @param x - input array - * @param stride - stride length + * @param strideX - stride length * @returns sum * * @example @@ -38,15 +38,15 @@ interface Routine { * var v = dsnansumpw( x.length, x, 1 ); * // returns 1.0 */ - ( N: number, x: Float32Array, stride: number ): number; + ( N: number, x: Float32Array, strideX: number ): number; /** - * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values and using extended accumulation and alternative indexing semantics. + * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and alternative indexing semantics, and returning an extended precision result. * * @param N - number of indexed elements * @param x - input array - * @param stride - stride length - * @param offset - starting index + * @param strideX - stride length + * @param offsetX - starting index * @returns sum * * @example @@ -57,7 +57,7 @@ interface Routine { * var v = dsnansumpw.ndarray( x.length, x, 1, 0 ); * // returns 1.0 */ - ndarray( N: number, x: Float32Array, stride: number, offset: number ): number; + ndarray( N: number, x: Float32Array, strideX: number, offsetX: number ): number; } /** @@ -65,7 +65,7 @@ interface Routine { * * @param N - number of indexed elements * @param x - input array -* @param stride - stride length +* @param strideX - stride length * @returns sum * * @example diff --git a/ext/base/dsnansumpw/examples/c/example.c b/ext/base/dsnansumpw/examples/c/example.c index 2b1caf89a..b00693023 100644 --- a/ext/base/dsnansumpw/examples/c/example.c +++ b/ext/base/dsnansumpw/examples/c/example.c @@ -17,21 +17,20 @@ */ #include "stdlib/blas/ext/base/dsnansumpw.h" -#include #include int main( void ) { // Create a strided array: - const float x[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 0.0/0.0, 0.0/0.0 }; + const float x[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 0.0f/0.0f, 0.0f/0.0f }; // Specify the number of elements: - const int64_t N = 5; + const int N = 5; // Specify the stride length: - const int64_t stride = 2; + const int strideX = 2; // Compute the sum: - double v = stdlib_strided_dsnansumpw( N, x, stride ); + double v = stdlib_strided_dsnansumpw( N, x, strideX ); // Print the result: printf( "sum: %lf\n", v ); diff --git a/ext/base/dsnansumpw/include/stdlib/blas/ext/base/dsnansumpw.h b/ext/base/dsnansumpw/include/stdlib/blas/ext/base/dsnansumpw.h index 265ce9d23..7dbcd67a5 100644 --- a/ext/base/dsnansumpw/include/stdlib/blas/ext/base/dsnansumpw.h +++ b/ext/base/dsnansumpw/include/stdlib/blas/ext/base/dsnansumpw.h @@ -19,7 +19,7 @@ #ifndef STDLIB_BLAS_EXT_BASE_DSNANSUMPW_H #define STDLIB_BLAS_EXT_BASE_DSNANSUMPW_H -#include +#include "stdlib/blas/base/shared.h" /* * If C++, prevent name mangling so that the compiler emits a binary file having undecorated names, thus mirroring the behavior of a C compiler. @@ -29,9 +29,14 @@ extern "C" { #endif /** -* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended summation, and returning an extended precision result. +* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result. */ -double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t stride ); +double API_SUFFIX(stdlib_strided_dsnansumpw)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX ); + +/** +* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and alternative indexing semantics, and returning an extended precision result. +*/ +double API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ); #ifdef __cplusplus } diff --git a/ext/base/dsnansumpw/lib/dsnansumpw.js b/ext/base/dsnansumpw/lib/dsnansumpw.js index b64fce0d5..1e935da0e 100644 --- a/ext/base/dsnansumpw/lib/dsnansumpw.js +++ b/ext/base/dsnansumpw/lib/dsnansumpw.js @@ -20,8 +20,8 @@ // MODULES // -var isnanf = require( '@stdlib/math/base/assert/is-nanf' ); -var sum = require( './ndarray.js' ); +var stride2offset = require( '@stdlib/strided/base/stride2offset' ); +var ndarray = require( './ndarray.js' ); // MAIN // @@ -39,7 +39,7 @@ var sum = require( './ndarray.js' ); * * @param {PositiveInteger} N - number of indexed elements * @param {Float32Array} x - input array -* @param {integer} stride - stride length +* @param {integer} strideX - stride length * @returns {number} sum * * @example @@ -50,37 +50,8 @@ var sum = require( './ndarray.js' ); * var v = dsnansumpw( x.length, x, 1 ); * // returns 1.0 */ -function dsnansumpw( N, x, stride ) { - var ix; - var s; - var i; - - if ( N <= 0 ) { - return 0.0; - } - if ( N === 1 || stride === 0 ) { - if ( isnanf( x[ 0 ] ) ) { - return 0.0; - } - return x[ 0 ]; - } - if ( stride < 0 ) { - ix = (1-N) * stride; - } else { - ix = 0; - } - if ( N < 8 ) { - // Use simple summation... - s = 0.0; - for ( i = 0; i < N; i++ ) { - if ( isnanf( x[ ix ] ) === false ) { - s += x[ ix ]; - } - ix += stride; - } - return s; - } - return sum( N, x, stride, ix ); +function dsnansumpw( N, x, strideX ) { + return ndarray( N, x, strideX, stride2offset( N, strideX ) ); } diff --git a/ext/base/dsnansumpw/lib/dsnansumpw.native.js b/ext/base/dsnansumpw/lib/dsnansumpw.native.js index 6b12faead..5fd56e20c 100644 --- a/ext/base/dsnansumpw/lib/dsnansumpw.native.js +++ b/ext/base/dsnansumpw/lib/dsnansumpw.native.js @@ -30,7 +30,7 @@ var addon = require( './../src/addon.node' ); * * @param {PositiveInteger} N - number of indexed elements * @param {Float32Array} x - input array -* @param {integer} stride - stride length +* @param {integer} strideX - stride length * @returns {number} sum * * @example @@ -41,8 +41,8 @@ var addon = require( './../src/addon.node' ); * var v = dsnansumpw( x.length, x, 1 ); * // returns 1.0 */ -function dsnansumpw( N, x, stride ) { - return addon( N, x, stride ); +function dsnansumpw( N, x, strideX ) { + return addon( N, x, strideX ); } diff --git a/ext/base/dsnansumpw/lib/ndarray.js b/ext/base/dsnansumpw/lib/ndarray.js index 98cbe8eaf..eacf60cf2 100644 --- a/ext/base/dsnansumpw/lib/ndarray.js +++ b/ext/base/dsnansumpw/lib/ndarray.js @@ -45,8 +45,8 @@ var BLOCKSIZE = 128; * * @param {PositiveInteger} N - number of indexed elements * @param {Float32Array} x - input array -* @param {integer} stride - stride length -* @param {NonNegativeInteger} offset - starting index +* @param {integer} strideX - stride length +* @param {NonNegativeInteger} offsetX - starting index * @returns {number} sum * * @example @@ -57,7 +57,7 @@ var BLOCKSIZE = 128; * var v = dsnansumpw( 5, x, 2, 1 ); * // returns 5.0 */ -function dsnansumpw( N, x, stride, offset ) { +function dsnansumpw( N, x, strideX, offsetX ) { var ix; var s0; var s1; @@ -75,13 +75,13 @@ function dsnansumpw( N, x, stride, offset ) { if ( N <= 0 ) { return 0.0; } - if ( N === 1 || stride === 0 ) { - if ( isnanf( x[ offset ] ) ) { + ix = offsetX; + if ( strideX === 0 ) { + if ( isnanf( x[ ix ] ) ) { return 0.0; } - return x[ offset ]; + return N * x[ ix ]; } - ix = offset; if ( N < 8 ) { // Use simple summation... s = 0.0; @@ -89,64 +89,64 @@ function dsnansumpw( N, x, stride, offset ) { if ( isnanf( x[ ix ] ) === false ) { s += x[ ix ]; } - ix += stride; + ix += strideX; } return s; } if ( N <= BLOCKSIZE ) { // Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)... s0 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s1 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s2 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s3 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s4 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s5 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s6 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s7 = ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; M = N % 8; for ( i = 8; i < N-M; i += 8 ) { s0 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s1 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s2 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s3 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s4 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s5 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s6 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; s7 += ( isnanf( x[ ix ] ) ) ? 0.0 : x[ ix ]; - ix += stride; + ix += strideX; } // Pairwise sum the accumulators: - s = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7)); + s = ( (s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7) ); // Clean-up loop... for ( i; i < N; i++ ) { if ( isnanf( x[ ix ] ) === false ) { s += x[ ix ]; } - ix += stride; + ix += strideX; } return s; } // Recurse by dividing by two, but avoiding non-multiples of unroll factor... n = floor( N/2 ); n -= n % 8; - return dsnansumpw( n, x, stride, ix ) + dsnansumpw( N-n, x, stride, ix+(n*stride) ); // eslint-disable-line max-len + return dsnansumpw( n, x, strideX, ix ) + dsnansumpw( N-n, x, strideX, ix+(n*strideX) ); // eslint-disable-line max-len } diff --git a/ext/base/dsnansumpw/lib/ndarray.native.js b/ext/base/dsnansumpw/lib/ndarray.native.js index 0aeb58f4a..c4d00d3e1 100644 --- a/ext/base/dsnansumpw/lib/ndarray.native.js +++ b/ext/base/dsnansumpw/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided/base/offset-view' ); -var addon = require( './dsnansumpw.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -32,8 +30,8 @@ var addon = require( './dsnansumpw.native.js' ); * * @param {PositiveInteger} N - number of indexed elements * @param {Float32Array} x - input array -* @param {integer} stride - stride length -* @param {NonNegativeInteger} offset - starting index +* @param {integer} strideX - stride length +* @param {NonNegativeInteger} offsetX - starting index * @returns {number} sum * * @example @@ -45,11 +43,8 @@ var addon = require( './dsnansumpw.native.js' ); * var v = dsnansumpw( 5, x, 2, 1 ); * // returns 5.0 */ -function dsnansumpw( N, x, stride, offset ) { - var view; - offset = minViewBufferIndex( N, stride, offset ); - view = offsetView( x, offset ); - return addon( N, view, stride ); +function dsnansumpw( N, x, strideX, offsetX ) { + return addon.ndarray( N, x, strideX, offsetX ); } diff --git a/ext/base/dsnansumpw/manifest.json b/ext/base/dsnansumpw/manifest.json index 90bef8ed4..f6e94aba3 100644 --- a/ext/base/dsnansumpw/manifest.json +++ b/ext/base/dsnansumpw/manifest.json @@ -28,61 +28,54 @@ { "task": "build", "src": [ - "./src/dsnansumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], "dependencies": [ "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", "@stdlib/napi/argv-strided-float32array", - "@stdlib/math/base/assert/is-nanf" + "@stdlib/math/base/assert/is-nanf", + "@stdlib/napi/create-double", + "@stdlib/strided/base/stride2offset", + "@stdlib/blas/base/shared" ] }, { "task": "benchmark", "src": [ - "./src/dsnansumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/napi/export", - "@stdlib/napi/argv", - "@stdlib/napi/argv-int64", - "@stdlib/napi/argv-strided-float32array", - "@stdlib/math/base/assert/is-nanf" + "@stdlib/math/base/assert/is-nanf", + "@stdlib/strided/base/stride2offset", + "@stdlib/blas/base/shared" ] }, { "task": "examples", "src": [ - "./src/dsnansumpw.c" + "./src/main.c" ], "include": [ "./include" ], - "libraries": [ - "-lm" - ], + "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/napi/export", - "@stdlib/napi/argv", - "@stdlib/napi/argv-int64", - "@stdlib/napi/argv-strided-float32array", - "@stdlib/math/base/assert/is-nanf" + "@stdlib/math/base/assert/is-nanf", + "@stdlib/strided/base/stride2offset", + "@stdlib/blas/base/shared" ] } ] diff --git a/ext/base/dsnansumpw/src/addon.c b/ext/base/dsnansumpw/src/addon.c index 2bea45c5f..11f7c694b 100644 --- a/ext/base/dsnansumpw/src/addon.c +++ b/ext/base/dsnansumpw/src/addon.c @@ -17,12 +17,14 @@ */ #include "stdlib/blas/ext/base/dsnansumpw.h" +#include "stdlib/blas/base/shared.h" #include "stdlib/napi/export.h" #include "stdlib/napi/argv.h" #include "stdlib/napi/argv_int64.h" #include "stdlib/napi/argv_strided_float32array.h" +#include "stdlib/strided/base/stride2offset.h" +#include "stdlib/napi/create_double.h" #include -#include /** * Receives JavaScript callback invocation data. @@ -34,14 +36,27 @@ static napi_value addon( napi_env env, napi_callback_info info ) { STDLIB_NAPI_ARGV( env, info, argv, argc, 3 ); STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); - STDLIB_NAPI_ARGV_INT64( env, stride, argv, 2 ); - STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, stride, argv, 1 ); - - napi_value v; - napi_status status = napi_create_double( env, stdlib_strided_dsnansumpw( N, X, stride ), &v ); - assert( status == napi_ok ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 1 ); + STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(stdlib_strided_dsnansumpw)( N, X, strideX ), v ); + return v; +} +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 4 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 3 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 1 ); + STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( N, X, strideX, offsetX ), v ); return v; } -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ) +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ); diff --git a/ext/base/dsnansumpw/src/dsnansumpw.c b/ext/base/dsnansumpw/src/main.c similarity index 61% rename from ext/base/dsnansumpw/src/dsnansumpw.c rename to ext/base/dsnansumpw/src/main.c index 1b718c38f..5cdd6b032 100644 --- a/ext/base/dsnansumpw/src/dsnansumpw.c +++ b/ext/base/dsnansumpw/src/main.c @@ -1,7 +1,7 @@ /** * @license Apache-2.0 * -* Copyright (c) 2020 The Stdlib Authors. +* Copyright (c) 2024 The Stdlib Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,8 @@ #include "stdlib/blas/ext/base/dsnansumpw.h" #include "stdlib/math/base/assert/is_nanf.h" -#include +#include "stdlib/strided/base/stride2offset.h" +#include "stdlib/blas/base/shared.h" /** * Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and returning an extended precision result. @@ -31,19 +32,39 @@ * * - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). * -* @param N number of indexed elements -* @param X input array -* @param stride stride length -* @return output value +* @param N number of indexed elements +* @param X input array +* @param strideX stride length +* @return output value */ -double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t stride ) { - float *xp1; - float *xp2; +double API_SUFFIX(stdlib_strided_dsnansumpw)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX ) { + CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + return API_SUFFIX( stdlib_strided_dsnansumpw_ndarray )( N, X, strideX, ox ); +} + +/** +* Computes the sum of single-precision floating-point strided array elements, ignoring `NaN` values, using pairwise summation with extended accumulation, and alternative indexing semantics, and returning an extended precision result. +* +* ## Method +* +* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. +* +* ## References +* +* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). +* +* @param N number of indexed elements +* @param X input array +* @param strideX stride length +* @param offsetX starting index +* @return output value +*/ +double API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ) { + CBLAS_INT ix; + CBLAS_INT M; + CBLAS_INT n; + CBLAS_INT i; double sum; - int64_t ix; - int64_t M; - int64_t n; - int64_t i; double s0; double s1; double s2; @@ -56,16 +77,12 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t if ( N <= 0 ) { return 0.0; } - if ( N == 1 || stride == 0 ) { - if ( stdlib_base_is_nanf( X[ 0 ] ) ) { + ix = offsetX; + if ( strideX == 0 ) { + if ( stdlib_base_is_nanf( X[ ix ] ) ) { return 0.0; } - return X[ 0 ]; - } - if ( stride < 0 ) { - ix = (1-N) * stride; - } else { - ix = 0; + return N * X[ ix ]; } if ( N < 8 ) { // Use simple summation... @@ -74,7 +91,7 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t if ( !stdlib_base_is_nanf( X[ ix ] ) ) { sum += (double)X[ ix ]; } - ix += stride; + ix += strideX; } return sum; } @@ -82,62 +99,55 @@ double stdlib_strided_dsnansumpw( const int64_t N, const float *X, const int64_t if ( N <= 128 ) { // Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)... s0 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s1 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s2 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s3 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s4 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s5 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s6 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s7 = ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; M = N % 8; for ( i = 8; i < N-M; i += 8 ) { s0 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s1 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s2 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s3 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s4 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s5 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s6 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; s7 += ( stdlib_base_is_nanf( X[ ix ] ) ) ? 0.0 : (double)X[ ix ]; - ix += stride; + ix += strideX; } // Pairwise sum the accumulators: - sum = ((s0+s1) + (s2+s3)) + ((s4+s5) + (s6+s7)); + sum = ( (s0+s1) + (s2+s3) ) + ( (s4+s5) + (s6+s7) ); // Clean-up loop... for (; i < N; i++ ) { if ( !stdlib_base_is_nanf( X[ ix ] ) ) { sum += (double)X[ ix ]; } - ix += stride; + ix += strideX; } return sum; } // Recurse by dividing by two, but avoiding non-multiples of unroll factor... n = N / 2; n -= n % 8; - if ( stride < 0 ) { - xp1 = (float *)X + ( (n-N)*stride ); - xp2 = (float *)X; - } else { - xp1 = (float *)X; - xp2 = (float *)X + ( n*stride ); - } - return stdlib_strided_dsnansumpw( n, xp1, stride ) + stdlib_strided_dsnansumpw( N-n, xp2, stride ); + return API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( n, X, strideX, ix ) + API_SUFFIX(stdlib_strided_dsnansumpw_ndarray)( N-n, X, strideX, ix+(n*strideX) ); } diff --git a/ext/base/dsnansumpw/test/test.dsnansumpw.js b/ext/base/dsnansumpw/test/test.dsnansumpw.js index 1ae603873..5ba8725e2 100644 --- a/ext/base/dsnansumpw/test/test.dsnansumpw.js +++ b/ext/base/dsnansumpw/test/test.dsnansumpw.js @@ -178,14 +178,26 @@ tape( 'the function supports a negative `stride` parameter', function test( t ) t.end(); }); -tape( 'if provided a `stride` parameter equal to `0`, the function returns the first element', function test( t ) { +tape( 'if provided a `stride` parameter equal to `0`, the function returns the sum of the first element repeated N times', function test( t ) { var x; var v; x = new Float32Array( [ 1.0, -2.0, -4.0, 5.0, 3.0 ] ); v = dsnansumpw( x.length, x, 0 ); - t.strictEqual( v, 1.0, 'returns expected value' ); + t.strictEqual( v, 5.0, 'returns expected value' ); + + t.end(); +}); + +tape( 'if provided a `stride` parameter equal to `0` and the first element is `NaN`, the function returns 0.0', function test( t ) { + var x; + var v; + + x = new Float32Array( [ NaN, -2.0, -4.0, 5.0, 3.0 ] ); + + v = dsnansumpw( x.length, x, 0 ); + t.strictEqual( v, 0.0, 'returns expected value' ); t.end(); }); diff --git a/ext/base/dsnansumpw/test/test.dsnansumpw.native.js b/ext/base/dsnansumpw/test/test.dsnansumpw.native.js index 03b587912..1d1766a7d 100644 --- a/ext/base/dsnansumpw/test/test.dsnansumpw.native.js +++ b/ext/base/dsnansumpw/test/test.dsnansumpw.native.js @@ -269,14 +269,26 @@ tape( 'the function supports a negative `stride` parameter', opts, function test t.end(); }); -tape( 'if provided a `stride` parameter equal to `0`, the function returns the first element', opts, function test( t ) { +tape( 'if provided a `stride` parameter equal to `0`, the function returns the sum of the first element repeated N times', opts, function test( t ) { var x; var v; x = new Float32Array( [ 1.0, -2.0, -4.0, 5.0, 3.0 ] ); v = dsnansumpw( x.length, x, 0 ); - t.strictEqual( v, 1.0, 'returns expected value' ); + t.strictEqual( v, 5.0, 'returns expected value' ); + + t.end(); +}); + +tape( 'if provided a `stride` parameter equal to `0` and the first element is `NaN`, the function returns 0.0', opts, function test( t ) { + var x; + var v; + + x = new Float32Array( [ NaN, -2.0, -4.0, 5.0, 3.0 ] ); + + v = dsnansumpw( x.length, x, 0 ); + t.strictEqual( v, 0.0, 'returns expected value' ); t.end(); }); diff --git a/ext/base/dsnansumpw/test/test.ndarray.js b/ext/base/dsnansumpw/test/test.ndarray.js index e964f47e1..77e2e2bf6 100644 --- a/ext/base/dsnansumpw/test/test.ndarray.js +++ b/ext/base/dsnansumpw/test/test.ndarray.js @@ -178,14 +178,26 @@ tape( 'the function supports a negative `stride` parameter', function test( t ) t.end(); }); -tape( 'if provided a `stride` parameter equal to `0`, the function returns the first indexed element', function test( t ) { +tape( 'if provided a `stride` parameter equal to `0`, the function returns the sum of the first element repeated N times', function test( t ) { var x; var v; x = new Float32Array( [ 1.0, -2.0, -4.0, 5.0, 3.0 ] ); v = dsnansumpw( x.length, x, 0, 0 ); - t.strictEqual( v, 1.0, 'returns expected value' ); + t.strictEqual( v, 5.0, 'returns expected value' ); + + t.end(); +}); + +tape( 'if provided a `stride` parameter equal to `0` and the first element is `NaN`, the function returns 0.0', function test( t ) { + var x; + var v; + + x = new Float32Array( [ NaN, -2.0, -4.0, 5.0, 3.0 ] ); + + v = dsnansumpw( x.length, x, 0, 0 ); + t.strictEqual( v, 0.0, 'returns expected value' ); t.end(); }); diff --git a/ext/base/dsnansumpw/test/test.ndarray.native.js b/ext/base/dsnansumpw/test/test.ndarray.native.js index ce60a1071..1be16e846 100644 --- a/ext/base/dsnansumpw/test/test.ndarray.native.js +++ b/ext/base/dsnansumpw/test/test.ndarray.native.js @@ -187,14 +187,26 @@ tape( 'the function supports a negative `stride` parameter', opts, function test t.end(); }); -tape( 'if provided a `stride` parameter equal to `0`, the function returns the first indexed element', opts, function test( t ) { +tape( 'if provided a `stride` parameter equal to `0`, the function returns the sum of the first element repeated N times', opts, function test( t ) { var x; var v; x = new Float32Array( [ 1.0, -2.0, -4.0, 5.0, 3.0 ] ); v = dsnansumpw( x.length, x, 0, 0 ); - t.strictEqual( v, 1.0, 'returns expected value' ); + t.strictEqual( v, 5.0, 'returns expected value' ); + + t.end(); +}); + +tape( 'if provided a `stride` parameter equal to `0` and the first element is `NaN`, the function returns 0.0', opts, function test( t ) { + var x; + var v; + + x = new Float32Array( [ NaN, -2.0, -4.0, 5.0, 3.0 ] ); + + v = dsnansumpw( x.length, x, 0, 0 ); + t.strictEqual( v, 0.0, 'returns expected value' ); t.end(); });