Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Jensen Shannon square roots (#233) #234

Merged
merged 10 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,35 +168,65 @@ open target/criterion/report/index.html

## JavaScript

If you don't have NPM installed:
### NodeJS

If you don't have the environment configured, here are the [installation options](https://github.com/nvm-sh/nvm?tab=readme-ov-file#install--update-script) with different tools:

```sh
wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # Linux
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # MacOS
```

Install dependencies:

```sh
wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
nvm install 20
npm install -g typescript # Install the TypeScript compiler globally
npm install --save-dev @types/node # Install the Node.js type definitions as a dev dependency
```

Testing and benchmarking:

```sh
npm install -g typescript
npm run build-js
npm test
npm run bench
npm run build-js # Build the JavaScript code using TypeScript configurations
npm test # Run the test suite
npm run bench # Run the benchmark script
```

### Deno

If you don't have the environment configured, here are [installation options](https://docs.deno.com/runtime/getting_started/installation/) with different tools:

```sh
wget -qO- https://deno.land/x/install/install.sh | sh # Linux
curl -fsSL https://deno.land/install.sh | sh # MacOS
irm https://deno.land/install.ps1 | iex # Windows
```

Running with Deno:
Testing:

```sh
deno test --allow-read
```

Running with Bun:
### Bun

If you don't have the environment configured, here are the [installation options](https://bun.sh/docs/installation) with different tools:

```sh
npm install -g bun
bun test
wget -qO- https://bun.sh/install | bash # for Linux
curl -fsSL https://bun.sh/install | bash # for macOS and WSL
```

Testing:

```sh
bun install
bun test ./scripts/test.mjs
```

... wouldn't work for now.

## Swift

```sh
Expand Down
23 changes: 14 additions & 9 deletions include/simsimd/probability.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const* a, simsimd_f16_
d += ai * SIMSIMD_LOG((ai + epsilon) / (mi + epsilon)); \
d += bi * SIMSIMD_LOG((bi + epsilon) / (mi + epsilon)); \
} \
*result = (simsimd_distance_t)d / 2; \
*result = SIMSIMD_SQRT(((simsimd_distance_t)d / 2)); \
}

SIMSIMD_MAKE_KL(serial, f64, f64, SIMSIMD_DEREFERENCE, SIMSIMD_F32_DIVISION_EPSILON) // simsimd_kl_f64_serial
Expand Down Expand Up @@ -219,12 +219,13 @@ SIMSIMD_PUBLIC void simsimd_js_f32_neon(simsimd_f32_t const *a, simsimd_f32_t co
float32x4_t log_ratio_b_vec = _simsimd_log2_f32_neon(ratio_b_vec);
float32x4_t prod_a_vec = vmulq_f32(a_vec, log_ratio_a_vec);
float32x4_t prod_b_vec = vmulq_f32(b_vec, log_ratio_b_vec);

sum_vec = vaddq_f32(sum_vec, vaddq_f32(prod_a_vec, prod_b_vec));
if (n != 0) goto simsimd_js_f32_neon_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer;
*result = sum / 2;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -296,8 +297,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_neon(simsimd_f16_t const *a, simsimd_f16_t co
if (n) goto simsimd_js_f16_neon_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer;
*result = sum / 2;
simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2;
*result = _simsimd_sqrt_f32_neon(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -403,8 +404,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_haswell(simsimd_f16_t const *a, simsimd_f16_t

simsimd_f32_t log2_normalizer = 0.693147181f;
simsimd_f32_t sum = _simsimd_reduce_f32x8_haswell(sum_vec);
sum *= log2_normalizer;
*result = sum / 2;
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -496,7 +497,9 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t
if (n) goto simsimd_js_f32_skylake_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down Expand Up @@ -586,7 +589,9 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_
if (n) goto simsimd_js_f16_sapphire_cycle;

simsimd_f32_t log2_normalizer = 0.693147181f;
*result = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec)) * log2_normalizer / 2;
simsimd_f32_t sum = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec));
sum *= log2_normalizer / 2;
*result = _simsimd_sqrt_f32_haswell(sum);
}

#pragma clang attribute pop
Expand Down
41 changes: 26 additions & 15 deletions javascript/fallback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ export const jaccard = (a: Uint8Array, b: Uint8Array): number => {
};

/**
* @brief Computes the kullbackleibler similarity coefficient between two vectors.
* @brief Computes the Kullback-Leibler divergence between two probability distributions.
* @param {Float64Array|Float32Array} a - The first vector.
* @param {Float64Array|Float32Array} b - The second vector.
* @returns {number} The Jaccard similarity coefficient between vectors a and b.
Expand All @@ -182,38 +182,49 @@ export const kullbackleibler = (a: Float64Array | Float32Array, b: Float64Array
}

let divergence = 0.0;

for (let i = 0; i < a.length; i++) {
if (a[i] > 0) {
if (b[i] === 0) {
throw new Error(
"Division by zero encountered in KL divergence calculation"
);
}
divergence += a[i] * Math.log(a[i] / b[i]);
if (a[i] < 0 || b[i] < 0) {
throw new Error("Negative values are not allowed in probability distributions");
}
if (b[i] === 0) {
throw new Error(
"Division by zero encountered in KL divergence calculation"
);
}
divergence += a[i] * Math.log(a[i] / b[i]);
}

return divergence;
};

/**
* @brief Computes the jensenshannon similarity coefficient between two vectors.
* @param {Float64Array|Float32Array} a - The first vector.
* @param {Float64Array|Float32Array} b - The second vector.
* @returns {number} The Jaccard similarity coefficient between vectors a and b.
* @brief Computes the Jensen-Shannon distance between two probability distributions.
* @param {Float64Array|Float32Array} a - The first probability distribution.
* @param {Float64Array|Float32Array} b - The second probability distribution.
* @returns {number} The Jensen-Shannon distance between distributions a and b.
*/
export const jensenshannon = (a: Float64Array | Float32Array, b: Float64Array | Float32Array): number => {
if (a.length !== b.length) {
throw new Error("Arrays must be of the same length");
}

const m = a.map((value, index) => (value + b[index]) / 2);
let divergence = 0;
for (let i = 0; i < a.length; i++) {
if (a[i] < 0 || b[i] < 0) {
throw new Error("Negative values are not allowed in probability distributions");
}
const m = (a[i] + b[i]) / 2;
if (m > 0) {
if (a[i] > 0) divergence += a[i] * Math.log(a[i] / m);
if (b[i] > 0) divergence += b[i] * Math.log(b[i] / m);
}
}

const divergence = 0.5 * kullbackleibler(a, m) + 0.5 * kullbackleibler(b, m);
divergence /= 2;
return Math.sqrt(divergence);
};


export default {
sqeuclidean,
euclidean,
Expand Down
19 changes: 10 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
"devDependencies": {
"@types/bindings": "^1.5.5",
"@types/node": "^20.17.1",
"@types/node": "^20.17.6",
"node-gyp": "^10.0.1",
"prebuildify": "^6.0.0",
"typescript": "^5.3.3"
Expand Down
10 changes: 5 additions & 5 deletions rust/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -877,19 +877,19 @@ mod tests {
// Adding new tests for probability similarities
#[test]
fn test_js_f32() {
let a = &[0.1, 0.9, 0.0];
let b = &[0.2, 0.8, 0.0];
let a: &[f32; 3] = &[0.1, 0.9, 0.0];
let b: &[f32; 3] = &[0.2, 0.8, 0.0];

if let Some(result) = ProbabilitySimilarity::jensenshannon(a, b) {
println!("The result of js_f32 is {:.8}", result);
assert_almost_equal(0.01, result, 0.01); // Example value
assert_almost_equal(0.099, result, 0.01); // Example value
}
}

#[test]
fn test_kl_f32() {
let a = &[0.1, 0.9, 0.0];
let b = &[0.2, 0.8, 0.0];
let a: &[f32; 3] = &[0.1, 0.9, 0.0];
let b: &[f32; 3] = &[0.2, 0.8, 0.0];

if let Some(result) = ProbabilitySimilarity::kullbackleibler(a, b) {
println!("The result of kl_f32 is {:.8}", result);
Expand Down
Loading
Loading