diff --git a/CHANGELOG.md b/CHANGELOG.md index 52be88048b77..e8056d58bf32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ### Features +* `[diff-sequences]` New package compares items in two sequences to find a + **longest common subsequence**. + ([#5407](https://github.com/facebook/jest/pull/5407)) * `[jest-config]` Allow lastComit and changedFilesWithAncestor via JSON config ([#5476](https://github.com/facebook/jest/pull/5476)) * `[jest-util]` Add deletion to `process.env` as well diff --git a/packages/diff-sequences/.npmignore b/packages/diff-sequences/.npmignore new file mode 100644 index 000000000000..85e48fe7b0a4 --- /dev/null +++ b/packages/diff-sequences/.npmignore @@ -0,0 +1,3 @@ +**/__mocks__/** +**/__tests__/** +src diff --git a/packages/diff-sequences/README.md b/packages/diff-sequences/README.md new file mode 100644 index 000000000000..60b3e9587912 --- /dev/null +++ b/packages/diff-sequences/README.md @@ -0,0 +1,443 @@ +# diff-sequences + +Compare items in two sequences to find a **longest common subsequence**. + +The items not in common are the items to delete or insert in a **shortest edit +script**. + +To maximize flexibility and minimize memory, you write **callback** functions as +configuration: + +**Input** function `isCommon(aIndex, bIndex)` compares items at indexes in the +sequences and returns a truthy/falsey value. This package might call your +function more than once for some pairs of indexes. + +* Because your function encapsulates **comparison**, this package can compare + items according to `===` operator, `Object.is` method, or other criterion. +* Because your function encapsulates **sequences**, this package can find + differences in arrays, strings, or other data. + +**Output** function `foundSubsequence(nCommon, aCommon, bCommon)` receives the +number of adjacent items and starting indexes of each common subsequence. If +sequences do not have common items, then this package does not call your +function. + +If N is the sum of lengths of sequences and L is length of a longest common +subsequence, then D = N – 2L is the number of **differences** in the +corresponding shortest edit script. + +[_An O(ND) Difference Algorithm and Its Variations_](http://xmailserver.org/diff2.pdf) +by Eugene W. Myers is fast when sequences have **few** differences. + +This package implements the **linear space** variation with optimizations so it +is fast even when sequences have **many** differences. + +## Usage + +To add this package as a dependency of a project, do either of the following: + +* `npm install diff-sequences` +* `yarn add diff-sequences` + +To use `diff` as the name of the default export from this package, do either of +the following: + +* `var diff = require('diff-sequences'); // CommonJS modules` +* `import diff from 'diff-sequences'; // ECMAScript modules` + +Call `diff` with the **lengths** of sequences and your **callback** functions: + +```js +/* eslint-disable no-var */ +var a = ['a', 'b', 'c', 'a', 'b', 'b', 'a']; +var b = ['c', 'b', 'a', 'b', 'a', 'c']; + +function isCommon(aIndex, bIndex) { + return a[aIndex] === b[bIndex]; +} +function foundSubsequence(nCommon, aCommon, bCommon) { + // see examples +} + +diff(a.length, b.length, isCommon, foundSubsequence); +``` + +## Example of longest common subsequence + +Some sequences (for example, `a` and `b` in the example of usage) have more than +one longest common subsequence. + +This package finds the following common items: + +| comparisons of common items | values | output arguments | +| :------------------------------- | :--------- | --------------------------: | +| `a[2] === b[0]` | `'c'` | `foundSubsequence(1, 2, 0)` | +| `a[4] === b[1]` | `'b'` | `foundSubsequence(1, 4, 1)` | +| `a[5] === b[3] && a[6] === b[4]` | `'b', 'a'` | `foundSubsequence(2, 5, 3)` | + +The “edit graph” analogy in the Myers paper shows the following common items: + +| comparisons of common items | values | +| :------------------------------- | :--------- | +| `a[2] === b[0]` | `'c'` | +| `a[3] === b[2] && a[4] === b[3]` | `'a', 'b'` | +| `a[6] === b[4]` | `'a'` | + +Various packages which implement the Myers algorithm will **always agree** on +the **length** of a longest common subsequence, but might **sometimes disagree** +on which **items** are in it. + +## Example of callback functions to count common items + +```js +/* eslint-disable no-var */ +// Return length of longest common subsequence according to === operator. +function countCommonItems(a, b) { + var n = 0; + function isCommon(aIndex, bIndex) { + return a[aIndex] === b[bIndex]; + } + function foundSubsequence(nCommon) { + n += nCommon; + } + + diff(a.length, b.length, isCommon, foundSubsequence); + + return n; +} + +var commonLength = countCommonItems( + ['a', 'b', 'c', 'a', 'b', 'b', 'a'], + ['c', 'b', 'a', 'b', 'a', 'c'], +); +``` + +| category of items | expression | value | +| :----------------- | ------------------------: | ----: | +| in common | `commonLength` | `4` | +| to delete from `a` | `a.length - commonLength` | `3` | +| to insert from `b` | `b.length - commonLength` | `2` | + +If the length difference `b.length - a.length` is: + +* negative: its absolute value is the minimum number of items to **delete** from + `a` +* positive: it is the minimum number of items to **insert** from `b` +* zero: there is an **equal** number of items to delete from `a` and insert from + `b` +* non-zero: there is an equal number of **additional** items to delete from `a` + and insert from `b` + +In this example, `6 - 7` is: + +* negative: `1` is the minimum number of items to **delete** from `a` +* non-zero: `2` is the number of **additional** items to delete from `a` and + insert from `b` + +## Example of callback functions to find common items + +```js +// Return array of items in longest common subsequence according to Object.is method. +const findCommonItems = (a, b) => { + const array = []; + diff( + a.length, + b.length, + (aIndex, bIndex) => Object.is(a[aIndex], b[bIndex]), + (nCommon, aCommon) => { + for (; nCommon !== 0; nCommon -= 1, aCommon += 1) { + array.push(a[aCommon]); + } + }, + ); + return array; +}; + +const commonItems = findCommonItems( + ['a', 'b', 'c', 'a', 'b', 'b', 'a'], + ['c', 'b', 'a', 'b', 'a', 'c'], +); +``` + +| `i` | `commonItems[i]` | `aIndex` | +| --: | :--------------- | -------: | +| `0` | `'c'` | `2` | +| `1` | `'b'` | `4` | +| `2` | `'b'` | `5` | +| `3` | `'a'` | `6` | + +## Example of callback functions to diff index intervals + +Instead of slicing array-like objects, you can adjust indexes in your callback +functions. + +```js +// Diff index intervals that are half open [start, end) like array slice method. +const diffIndexIntervals = (a, aStart, aEnd, b, bStart, bEnd) => { + // Validate: 0 <= aStart and aStart <= aEnd and aEnd <= a.length + // Validate: 0 <= bStart and bStart <= bEnd and bEnd <= b.length + + diff( + aEnd - aStart, + bEnd - bStart, + (aIndex, bIndex) => Object.is(a[aStart + aIndex], b[bStart + bIndex]), + (nCommon, aCommon, bCommon) => { + // aStart + aCommon, bStart + bCommon + }, + ); + + // After the last common subsequence, do any remaining work. +}; +``` + +## Example of callback functions to emulate diff command + +Linux or Unix has a `diff` command to compare files line by line. Its output is +a **shortest edit script**: + +* **c**hange adjacent lines from the first file to lines from the second file +* **d**elete lines from the first file +* **a**ppend or insert lines from the second file + +```js +// Given zero-based half-open range [start, end) of array indexes, +// return one-based closed range [start + 1, end] as string. +const getRange = (start, end) => + start + 1 === end ? `${start + 1}` : `${start + 1},${end}`; + +// Given index intervals of lines to delete or insert, or both, or neither, +// push formatted diff lines onto array. +const pushDelIns = (aLines, aIndex, aEnd, bLines, bIndex, bEnd, array) => { + const deleteLines = aIndex !== aEnd; + const insertLines = bIndex !== bEnd; + const changeLines = deleteLines && insertLines; + if (changeLines) { + array.push(getRange(aIndex, aEnd) + 'c' + getRange(bIndex, bEnd)); + } else if (deleteLines) { + array.push(getRange(aIndex, aEnd) + 'd' + String(bIndex)); + } else if (insertLines) { + array.push(String(aIndex) + 'a' + getRange(bIndex, bEnd)); + } else { + return; + } + + for (; aIndex !== aEnd; aIndex += 1) { + array.push('< ' + aLines[aIndex]); // delete is less than + } + + if (changeLines) { + array.push('---'); + } + + for (; bIndex !== bEnd; bIndex += 1) { + array.push('> ' + bLines[bIndex]); // insert is greater than + } +}; + +// Given content of two files, return emulated output of diff utility. +const findShortestEditScript = (a, b) => { + const aLines = a.split('\n'); + const bLines = b.split('\n'); + const aLength = aLines.length; + const bLength = bLines.length; + + const isCommon = (aIndex, bIndex) => aLines[aIndex] === bLines[bIndex]; + + let aIndex = 0; + let bIndex = 0; + const array = []; + const foundSubsequence = (nCommon, aCommon, bCommon) => { + pushDelIns(aLines, aIndex, aCommon, bLines, bIndex, bCommon, array); + aIndex = aCommon + nCommon; // number of lines compared in a + bIndex = bCommon + nCommon; // number of lines compared in b + }; + + diff(aLength, bLength, isCommon, foundSubsequence); + + // After the last common subsequence, push remaining change lines. + pushDelIns(aLines, aIndex, aLength, bLines, bIndex, bLength, array); + + return array.length === 0 ? '' : array.join('\n') + '\n'; +}; +``` + +## Example of callback functions to format diff lines + +Here is simplified code to format **changed and unchanged lines** in expected +and received values after a test fails in Jest: + +```js +// Format diff with minus or plus for change lines and space for common lines. +const formatDiffLines = (a, b) => { + // Jest depends on pretty-format package to serialize objects as strings. + // Unindented for comparison to avoid distracting differences: + const aLinesUn = format(a, {indent: 0 /*, other options*/}).split('\n'); + const bLinesUn = format(b, {indent: 0 /*, other options*/}).split('\n'); + // Indented to display changed and unchanged lines: + const aLinesIn = format(a, {indent: 2 /*, other options*/}).split('\n'); + const bLinesIn = format(b, {indent: 2 /*, other options*/}).split('\n'); + + const aLength = aLinesIn.length; // Validate: aLinesUn.length === aLength + const bLength = bLinesIn.length; // Validate: bLinesUn.length === bLength + + const isCommon = (aIndex, bIndex) => aLinesUn[aIndex] === bLinesUn[bIndex]; + + // Only because the GitHub Flavored Markdown doc collapses adjacent spaces, + // this example code and the following table represent spaces as middle dots. + let aIndex = 0; + let bIndex = 0; + const array = []; + const foundSubsequence = (nCommon, aCommon, bCommon) => { + for (; aIndex !== aCommon; aIndex += 1) { + array.push('-·' + aLinesIn[aIndex]); // delete is minus + } + for (; bIndex !== bCommon; bIndex += 1) { + array.push('+·' + bLinesIn[bIndex]); // insert is plus + } + for (; nCommon !== 0; nCommon -= 1, aIndex += 1, bIndex += 1) { + // For common lines, received indentation seems more intuitive. + array.push('··' + bLinesIn[bIndex]); // common is space + } + }; + + diff(aLength, bLength, isCommon, foundSubsequence); + + // After the last common subsequence, push remaining change lines. + for (; aIndex !== aLength; aIndex += 1) { + array.push('-·' + aLinesIn[aIndex]); + } + for (; bIndex !== bLength; bIndex += 1) { + array.push('+·' + bLinesIn[bIndex]); + } + + return array; +}; + +const expected = { + searching: '', + sorting: { + ascending: true, + fieldKey: 'what', + }, +}; +const received = { + searching: '', + sorting: [ + { + descending: false, + fieldKey: 'what', + }, + ], +}; + +const diffLines = formatDiffLines(expected, received); +``` + +If N is the sum of lengths of sequences and L is length of a longest common +subsequence, then N – L is length of an array of diff lines. In this example, N +is 7 + 9, L is 5, and N – L is 11. + +| `i` | `diffLines[i]` | `aIndex` | `bIndex` | +| ---: | :--------------------------------- | -------: | -------: | +| `0` | `'··Object {'` | `0` | `0` | +| `1` | `'····"searching": "",'` | `1` | `1` | +| `2` | `'-···"sorting": Object {'` | `2` | | +| `3` | `'-·····"ascending": true,'` | `3` | | +| `4` | `'+·····"sorting": Array ['` | | `2` | +| `5` | `'+·······Object {'` | | `3` | +| `6` | `'+·········"descending": false,'` | | `4` | +| `7` | `'··········"fieldKey": "what",'` | `4` | `5` | +| `8` | `'········},'` | `5` | `6` | +| `9` | `'+·····],'` | | `7` | +| `10` | `'··}'` | `6` | `8` | + +## Example of callback functions to find diff items + +Here is simplified code to find changed and unchanged substrings **within +adjacent changed lines** in expected and received values after a test fails in +Jest: + +```js +// Return diff items for strings (compatible with diff-match-patch package). +const findDiffItems = (a, b) => { + const isCommon = (aIndex, bIndex) => a[aIndex] === b[bIndex]; + + let aIndex = 0; + let bIndex = 0; + const array = []; + const foundSubsequence = (nCommon, aCommon, bCommon) => { + if (aIndex !== aCommon) { + array.push([-1, a.slice(aIndex, aCommon)]); // delete is -1 + } + if (bIndex !== bCommon) { + array.push([1, b.slice(bIndex, bCommon)]); // insert is 1 + } + + aIndex = aCommon + nCommon; // number of characters compared in a + bIndex = bCommon + nCommon; // number of characters compared in b + array.push([0, a.slice(aCommon, aIndex)]); // common is 0 + }; + + diff(a.length, b.length, isCommon, foundSubsequence); + + // After the last common subsequence, push remaining change items. + if (aIndex !== a.length) { + array.push([-1, a.slice(aIndex)]); + } + if (bIndex !== b.length) { + array.push([1, b.slice(bIndex)]); + } + + return array; +}; + +const expectedDeleted = ['"sorting": Object {', '"ascending": true,'].join( + '\n', +); +const receivedInserted = [ + '"sorting": Array [', + 'Object {', + '"descending": false,', +].join('\n'); + +const diffItems = findDiffItems(expectedDeleted, receivedInserted); +``` + +| `i` | `diffItems[i][0]` | `diffItems[i][1]` | +| --: | ----------------: | :---------------- | +| `0` | `0` | `'"sorting": '` | +| `1` | `1` | `'Array [\n'` | +| `2` | `0` | `'Object {\n"'` | +| `3` | `-1` | `'a'` | +| `4` | `1` | `'de'` | +| `5` | `0` | `'scending": '` | +| `6` | `-1` | `'tru'` | +| `7` | `1` | `'fals'` | +| `8` | `0` | `'e,'` | + +The length difference `b.length - a.length` is equal to the sum of +`diffItems[i][0]` values times `diffItems[i][1]` lengths. In this example, the +difference `48 - 38` is equal to the sum `10`. + +| category of diff item | `[0]` | `[1]` lengths | subtotal | +| :-------------------- | ----: | -----------------: | -------: | +| in common | `0` | `11 + 10 + 11 + 2` | `0` | +| to delete from `a` | `–1` | `1 + 3` | `-4` | +| to insert from `b` | `1` | `8 + 2 + 4` | `14` | + +Instead of formatting the changed substrings with escape codes for colors in the +`foundSubsequence` function to save memory, this example spends memory to **gain +flexibility** before formatting, so a separate heuristic algorithm might modify +the generic array of diff items to show changes more clearly: + +| `i` | `diffItems[i][0]` | `diffItems[i][1]` | +| --: | ----------------: | :---------------- | +| `6` | `-1` | `'true'` | +| `7` | `1` | `'false'` | +| `8` | `0` | `','` | + +For expected and received strings of serialized data, the result of finding +changed **lines**, and then finding changed **substrings** within adjacent +changed lines (as in the preceding two examples) sometimes displays the changes +in a more intuitive way than the result of finding changed substrings, and then +splitting them into changed and unchanged lines. diff --git a/packages/diff-sequences/package.json b/packages/diff-sequences/package.json new file mode 100644 index 000000000000..ccc9ec833746 --- /dev/null +++ b/packages/diff-sequences/package.json @@ -0,0 +1,18 @@ +{ + "name": "diff-sequences", + "version": "22.1.0", + "repository": { + "type": "git", + "url": "https://github.com/facebook/jest.git" + }, + "license": "MIT", + "description": "Compare items in two sequences to find a longest common subsequence", + "keywords": [ + "fast", + "linear", + "space", + "callback", + "diff" + ], + "main": "build/index.js" +} diff --git a/packages/diff-sequences/src/__tests__/__snapshots__/index.test.js.snap b/packages/diff-sequences/src/__tests__/__snapshots__/index.test.js.snap new file mode 100644 index 000000000000..d6b738c540c3 --- /dev/null +++ b/packages/diff-sequences/src/__tests__/__snapshots__/index.test.js.snap @@ -0,0 +1,60 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`common substrings regression 1`] = ` +Array [ + "I", + " se", + "e", + " ", + "a", + " perfection ", + "i", + " att", + "in", + "e", + " no", + " ", + "n", + " i", + " n", + " ", + "a", + " ", + "u", + " ", + "en", + " ", + "t", + "er", + " ", + "is ", + "n", + "i", + "n", + " ", + "r", + "e", + " ", + " re", + "e", + ".", +] +`; + +exports[`common substrings wrapping 1`] = ` +Array [ + "When engineers ", + "a", + "v", + "e", + " ", + "ready-to-use tools, they", + " writ", + " more", + "tests, which", + " results in", + "more stabl", + "e", + " code bases.", +] +`; diff --git a/packages/diff-sequences/src/__tests__/index.test.js b/packages/diff-sequences/src/__tests__/index.test.js new file mode 100644 index 000000000000..9db44b86793c --- /dev/null +++ b/packages/diff-sequences/src/__tests__/index.test.js @@ -0,0 +1,757 @@ +/** + * Copyright (c) 2014-present, Facebook, Inc. All rights reserved. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +import diff from '../'; + +describe('invalid arg', () => { + const isCommon = () => false; + const foundSubsequence = () => {}; + + describe('length', () => { + test('is not a number', () => { + expect(() => { + diff(('0': any), 0, isCommon, foundSubsequence); + }).toThrow(/aLength/); + }); + test('Infinity is not a safe integer', () => { + expect(() => { + diff(Infinity, 0, isCommon, foundSubsequence); + }).toThrow(/aLength/); + }); + test('Not a Number is not a safe integer', () => { + expect(() => { + diff(NaN, 0, isCommon, foundSubsequence); + }).toThrow(/aLength/); + }); + + test('MAX_SAFE_INTEGER + 1 is not a safe integer', () => { + expect(() => { + diff(0, Number.MAX_SAFE_INTEGER + 1, isCommon, foundSubsequence); + }).toThrow(/bLength/); + }); + test('MIN_SAFE_INTEGER - 1 is not a safe integer', () => { + expect(() => { + diff(0, Number.MIN_SAFE_INTEGER - 1, isCommon, foundSubsequence); + }).toThrow(/bLength/); + }); + test('is a negative integer', () => { + expect(() => { + diff(0, -1, isCommon, foundSubsequence); + }).toThrow(/bLength/); + }); + }); + + describe('callback', () => { + test('null is not a function', () => { + expect(() => { + diff(0, 0, (null: any), foundSubsequence); + }).toThrow(/isCommon/); + }); + test('undefined is not a function', () => { + expect(() => { + diff(0, 0, isCommon, (undefined: any)); + }).toThrow(/foundSubsequence/); + }); + }); +}); + +// Return length of longest common subsequence according to Object.is method. +const countCommonObjectIs = (a: Array, b: Array): number => { + let n = 0; + diff( + a.length, + b.length, + (aIndex: number, bIndex: number) => Object.is(a[aIndex], b[bIndex]), + (nCommon: number) => { + n += nCommon; + }, + ); + return n; +}; + +// Return length of longest common subsequence according to === operator. +const countCommonStrictEquality = (a: Array, b: Array): number => { + let n = 0; + diff( + a.length, + b.length, + (aIndex: number, bIndex: number) => a[aIndex] === b[bIndex], + (nCommon: number) => { + n += nCommon; + }, + ); + return n; +}; + +describe('input callback encapsulates comparison', () => { + describe('zero and negative zero', () => { + const a = [0]; + const b = [-0]; + + test('are not common according to Object.is method', () => { + expect(countCommonObjectIs(a, b)).toEqual(0); + }); + test('are common according to === operator', () => { + expect(countCommonStrictEquality(a, b)).toEqual(1); + }); + }); + + describe('Not a Number', () => { + // input callback encapsulates identical sequences + const a = [NaN]; + + test('is common according to Object.is method', () => { + expect(countCommonObjectIs(a, a)).toEqual(1); + }); + test('is not common according to === operator', () => { + expect(countCommonStrictEquality(a, a)).toEqual(0); + }); + }); +}); + +const assertMin = (name: string, val: number, min: number) => { + if (val < min) { + throw new RangeError(`${name} value ${val} is less than min ${min}`); + } +}; + +const assertMax = (name: string, val: number, max: number) => { + if (max < val) { + throw new RangeError(`${name} value ${val} is greater than max ${max}`); + } +}; + +const assertEnd = (name: string, val: number, end: number) => { + if (end <= val) { + throw new RangeError(`${name} value ${val} is not less than end ${end}`); + } +}; + +const assertCommonItems = ( + a: Array | string, + b: Array | string, + nCommon: number, + aCommon: number, + bCommon: number, +) => { + for (; nCommon !== 0; nCommon -= 1, aCommon += 1, bCommon += 1) { + if (a[aCommon] !== b[bCommon]) { + throw new Error( + `output item is not common for aCommon=${aCommon} and bCommon=${bCommon}`, + ); + } + } +}; + +// Return array of items in a longest common subsequence of array-like objects. +const findCommonItems = ( + a: Array | string, + b: Array | string, +): Array => { + const array = []; + diff( + a.length, + b.length, + (aIndex: number, bIndex: number) => { + assertMin('input aIndex', aIndex, 0); + assertEnd('input aIndex', aIndex, a.length); + assertMin('input bIndex', bIndex, 0); + assertEnd('input bIndex', bIndex, b.length); + return a[aIndex] === b[bIndex]; + }, + (nCommon: number, aCommon: number, bCommon: number) => { + assertMin('output nCommon', nCommon, 1); + assertMin('output aCommon', aCommon, 0); + assertMax('output aCommon + nCommon', aCommon + nCommon, a.length); + assertMin('output bCommon', bCommon, 0); + assertMax('output bCommon + nCommon', bCommon + nCommon, b.length); + assertCommonItems(a, b, nCommon, aCommon, bCommon); + for (; nCommon !== 0; nCommon -= 1, aCommon += 1) { + array.push(a[aCommon]); + } + }, + ); + return array; +}; + +// Assert that array-like objects have the expected common items. +const expectCommonItems = ( + a: Array | string, + b: Array | string, + expected: Array, +) => { + expect(findCommonItems(a, b)).toEqual(expected); + + if (a.length !== b.length) { + // If sequences a and b have different lengths, + // then if you swap sequences in your callback functions, + // this package finds the same items. + expect(findCommonItems(b, a)).toEqual(expected); + } +}; + +describe('input callback encapsulates sequences', () => { + // Example sequences in “edit graph” analogy from + // An O(ND) Difference Algorithm and Its Variations by Eugene W. Myers + const a = ['a', 'b', 'c', 'a', 'b', 'b', 'a']; + const b = ['c', 'b', 'a', 'b', 'a', 'c']; + + // Because a and b have more than one longest common subsequence, + // expected value might change if implementation changes. + // For example, Myers paper shows: ['c', 'a', 'b', 'a'] + const expected = ['c', 'b', 'b', 'a']; + + test('arrays of strings', () => { + expectCommonItems(a, b, expected); + }); + test('string and array of strings', () => { + expectCommonItems(a.join(''), b, expected); + }); + test('strings', () => { + expectCommonItems(a.join(''), b.join(''), expected); + }); +}); + +describe('no common items', () => { + // default export does not call findSubsequences nor divide + + describe('negative zero is equivalent to zero for length', () => { + const countItemsNegativeZero = (aLength, bLength) => { + let n = 0; + diff( + aLength, + bLength, + () => { + throw new Error('input function should not have been called'); + }, + nCommon => { + n += nCommon; + }, + ); + return n; + }; + + test('of a', () => { + expect(countItemsNegativeZero(-0, 1)).toEqual(0); + }); + test('of b', () => { + expect(countItemsNegativeZero(1, -0)).toEqual(0); + }); + test('of a and b', () => { + expect(countItemsNegativeZero(-0, -0)).toEqual(0); + }); + }); + + test('a empty and b empty', () => { + const a = []; + const b = []; + const expected = []; + expectCommonItems(a, b, expected); + }); + test('a empty and b non-empty', () => { + const a = []; + const b = [false]; + const expected = []; + expectCommonItems(a, b, expected); + }); + test('a non-empty and b empty', () => { + const a = [false, true]; + const b = []; + const expected = []; + expectCommonItems(a, b, expected); + }); + + // default export does call findSubsequences and divide + describe('a non-empty and b non-empty', () => { + test('baDeltaLength 0 even', () => { + // findSubsequences not transposed because graph is square + // reverse path overlaps on first iteration with d === 1 + // last segment cannot have a prev segment + const a = [false]; + const b = [true]; + const expected = []; + expectCommonItems(a, b, expected); + }); + test('baDeltaLength 1 odd', () => { + // findSubsequences transposed because graph has landscape orientation + // forward path overlaps on first iteration with d === 2 + // last segment has a prev segment because unroll a half iteration + const a = [0, 1]; + const b = ['0']; + const expected = []; + expectCommonItems(a, b, expected); + }); + test('baDeltaLength 2 even', () => { + // findSubsequences transposed because graph has landscape orientation + // reverse path overlaps with d === 3 + // last segment has a prev segment + const a = [0, 1, 2, 3]; + const b = ['0', '1']; + const expected = []; + expectCommonItems(a, b, expected); + }); + test('baDeltaLength 7 odd', () => { + // findSubsequences not transposed because graph has portrait orientation + // forward path overlaps with d === 7 + // last segment has a prev segment + const a = ['0', '1', '2']; + const b = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + const expected = []; + expectCommonItems(a, b, expected); + }); + }); +}); + +describe('only common items', () => { + // input callback encapsulates identical sequences + // default export trims common items from the start + // default export does not call findSubsequences nor divide + test('length 1', () => { + const a = [false]; + expectCommonItems(a, a, a); + }); + test('length 2', () => { + const a = [false, true]; + expectCommonItems(a, a, a); + }); +}); + +describe('all common items outside', () => { + const common1 = 'common1'; + const common2 = 'common2'; + const common3 = 'common3'; + + // default export does not call findSubsequences nor divide + test('preceding changes', () => { + // default export trims common item from the start + const a = [common1]; + const b = [common1, 'insert1', 'insert2']; + const expected = [common1]; + expectCommonItems(a, b, expected); + }); + test('following change', () => { + // default export trims common items from the end + const a = ['delete1', common1, common2]; + const b = [common1, common2]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('preceding and following changes in one sequence', () => { + // default export trims common items from the start and end + const a = [common1, common2, 'delete1', 'delete2', common3]; + const b = [common1, common2, common3]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); +}); + +describe('some common items inside and outside', () => { + const common1 = 'common1'; + const common2 = 'common2'; + + // default export does call findSubsequences and divide + test('preceding changes adjacent to common in both sequences', () => { + // default export trims common item from the start + // baDeltaLength 0 even + // common item follows last (only) reverse segment when d === 1 + const a = [common1, common2, 'delete1_lastR']; + const b = [common1, 'insert1', common2]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('following changes adjacent to common in both sequences', () => { + // default export trims common item from the end + // baDeltaLength 1 odd + // common item follows prev (but not last) forward segment when d === 2 + const a = [common1, 'delete1', common2]; + const b = ['insert1_prevF', common1, 'insert2_lastF', common2]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); +}); + +describe('all common items inside non-recursive', () => { + // The index intervals preceding and following the middle change + // contain only changes, therefore they cannot contain any common items. + const common1 = 'common1'; + const common2 = 'common2'; + const common3 = 'common3'; + + test('move from start to end relative to change', () => { + // baDeltaLength 0 even + // common items follow last (only) reverse segment when d === 1 + const a = [common1, common2, 'delete1']; + const b = ['insert1', common1, common2]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('move from start to end relative to common', () => { + // baDeltaLength 0 even + // common items follow last (only) reverse segment when d === 1 + const a = [common1, common2, common3]; + const b = [common3, common1, common2]; + // common3 is delete from a and insert from b + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('move from start to end relative to change and common', () => { + // baDeltaLength 0 even + // common items follow last reverse segment when d === 3 + const a = [common1, common2, 'delete1_lastR', common3, 'delete2']; + const b = ['insert1', common3, 'insert2', common1, common2]; + // common3 is delete from a and insert from b + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('reverse relative to change', () => { + // baDeltaLength 0 even + // common item follows last reverse segment when d === 4 + const a = [common1, 'delete1', common2, 'delete2', common3]; + const b = [common3, 'insert1_lastR', common2, 'insert2', common1]; + + // Because a and b have more than one longest common subsequence, + // expected value might change if implementation changes. + // common1 and common2 are delete from a and insert from b + const expected = [common3]; + expectCommonItems(a, b, expected); + }); + + test('preceding middle', () => { + // baDeltaLength 1 odd + // common items follow prev and last forward segments when d === 3 + const a = ['delete1', common1, common2, common3, 'delete2']; + const b = [ + 'insert1_prevF', + common1, + 'insert2_lastF', + common2, + common3, + 'insert3', + ]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); + test('following middle', () => { + // baDeltaLength 2 even + // common items follow prev and last reverse segments when d === 4 + const a = ['delete1', 'delete2', common1, common2, common3, 'delete3']; + const b = [ + 'insert1', + 'insert2', + common1, + common2, + 'insert3_lastR', + common3, + 'insert4_prevR', + 'insert5', + ]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); +}); + +describe('all common items inside recursive', () => { + // Because a and b have only one longest common subsequence, + // expected value cannot change if implementation changes. + const common1 = 'common1'; + const common2 = 'common2'; + const common3 = 'common3'; + const common4 = 'common4'; + const common5 = 'common5'; + const common6 = 'common6'; + + test('prev reverse at depth 1 and preceding at depth 2', () => { + // depth 1 common item follows prev reverse segment when d === 3 + // depth 2 preceding common items follow prev and last forward segments when d === 2 + const a = [ + 'delete1_depth2_preceding_prevF', + common1, + common2, + common3, + 'delete2_depth1_prevR', + 'delete3', + ]; + const b = [ + common1, + 'insert1_depth2_preceding_lastF', + common2, + 'insert2', + 'insert3_depth1_lastR', + common3, + ]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); + test('last forward at depth 1 and following at depth 2', () => { + // depth 1 common item follows last forward segment when d === 5 + // depth 2 following common items follow prev and last reverse segments when d === 2 + const a = [ + 'delete1', + 'delete2', + common1, + 'delete3', + common2, + 'delete4_depth2_following_lastR', + common3, + ]; + const b = [ + 'insert1', + 'insert2', + 'insert3_depth1_lastF', + common1, + 'insert4', + common2, + common3, + 'insert5_depth2_following_prevR', + ]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); + test('preceding at depth 2 and both at depth 3 of following', () => { + // depth 1 transposed from landscape to portrait so swap args + // depth 1 common items do not follow prev nor last forward segment when d === 8 + // depth 2 preceding common item follows prev forward segment when d === 4 + // depth 2 following transposed again so unswap swapped args + // depth 2 following common items do not follow prev nor last foward segment when d === 4 + // depth 3 preceding common item follows last forward segment when d === 2 + // depth 3 following rransposed again so swap args again + // depth 3 following common item follows last forward segment when d === 2 + const a = [ + 'delete1_depth2_preceding_prevF', + common1, + 'delete2_depth2_preceding_middle', + 'delete3', + 'delete4', + 'delete5_depth1_middle', + common2, + 'delete6', + 'delete7', + 'delete8_depth3_following_lastF', + common3, + ]; + const b = [ + 'insert1', + 'insert2', + common1, + 'insert3', + 'insert4', + 'insert5_depth3_preceding_lastF', + common2, + 'insert6_depth2_following_middle', + common3, + 'insert7', + ]; + const expected = [common1, common2, common3]; + expectCommonItems(a, b, expected); + }); + + test('interleaved single change', () => { + // depth 1 common items follow prev and last forward segment when d === 4 + // depth 2 preceding common items follow prev and last forward segment when d === 2 + // depth 2 following common items follow prev and last forward segment when d === 2 + const a = [common1, common2, common3, common4, common5, common6]; + const b = [ + 'insert1_depth_2_preceding_prevF', + common1, + 'insert2_depth2_preceding_lastF', + common2, + 'insert3_depth1_prevF', + common3, + 'insert4_depth1_lastF', + common4, + 'insert5_depth2_following_prevF', + common5, + 'insert6_depth2_following_lastF', + common6, + 'insert7', + ]; + const expected = [common1, common2, common3, common4, common5, common6]; + expectCommonItems(a, b, expected); + }); + test('interleaved double changes', () => { + // depth 1 common item follows prev reverse segment when d === 7 + // depth 2 preceding transposed from landscape to portrait so swap args + // depth 2 preceding common item follows last forward segment when d === 4 + // depth 3 preceding transposed again so unswap swapped args + // depth 3 preceding preceding common item follows last forward segment when d === 2 + // depth 2 following common item follows prev reverse segment when d === 3 + // depth 3 following preceding transposed + // depth 3 following preceding common item follows last forward segment when d === 2 + const a = [ + 'delete1', + common1, + 'delete2_depth2_preceding_lastF', + common2, + 'delete3_depth3_preceding_following_lastF', + common3, + 'delete4', + common4, + 'delete5_depth3_following_preceding_lastF', + common5, + 'delete6', + common6, + 'delete7', + ]; + const b = [ + 'insert1_depth3_preceding_preceding_lastF', + common1, + 'insert2', + common2, + 'insert3', + common3, + 'insert4_depth1_middle', + common4, + 'insert5_depth1_prevR', + common5, + 'insert6', + common6, + 'insert7_depth2_following_prevR', + ]; + const expected = [common1, common2, common3, common4, common5, common6]; + expectCommonItems(a, b, expected); + }); + + test('optimization decreases iMaxF', () => { + // iMaxF 3 initially because aLength + // iMaxF 1 at d === 4 + // depth 1 common items do not follow prev nor last forward segment when d === 5 + // depth 2 preceding common item follows last forward segment when d === 3 + // depth 3 preceding preceding common item follows last (only) reverse segment when d === 1 + const a = [common1, 'delete1_depth3_lastR', common2]; + const b = [ + 'insert1', + common1, + 'insert2_depth2_lastF', + common2, + 'insert3', + 'insert4', + 'insert5', + 'insert6', + 'insert7', + 'insert8', + 'insert9', + ]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); + test('optimization decreases iMaxR', () => { + // iMaxF 3 initially because aLength + // iMaxR 0 at d === 2 + // depth 1 common items do not follow prev nor last forward segment when d === 5 + // depth 2 following common items follow prev reverse segment when d === 2 + const a = [common1, common2]; + const b = [ + 'insert1', + 'insert2', + 'insert3', + 'insert4', + 'insert5_depth1_middle', + 'insert6', + 'insert7', + 'insert8_depth2_middle', + common1, + common2, + 'insert9_depth2_prevR', + ]; + const expected = [common1, common2]; + expectCommonItems(a, b, expected); + }); +}); + +const assertCommonSubstring = ( + a: string, + b: string, + nCommon: number, + aCommon: number, + bCommon: number, +) => { + const aSubstring = a.slice(aCommon, aCommon + nCommon); + const bSubstring = b.slice(bCommon, bCommon + nCommon); + if (aSubstring !== bSubstring) { + throw new Error( + `output substrings ${aSubstring} and ${bSubstring} are not common for nCommon=${nCommon} aCommon=${aCommon} bCommon=${bCommon}`, + ); + } +}; + +// Return array of substrings in a longest common subsequence of strings. +const findCommonSubstrings = (a: string, b: string): Array => { + const array = []; + diff( + a.length, + b.length, + (aIndex: number, bIndex: number) => { + assertMin('input aIndex', aIndex, 0); + assertEnd('input aIndex', aIndex, a.length); + assertMin('input bIndex', bIndex, 0); + assertEnd('input bIndex', bIndex, b.length); + return a[aIndex] === b[bIndex]; + }, + (nCommon: number, aCommon: number, bCommon: number) => { + assertMin('output nCommon', nCommon, 1); + assertMin('output aCommon', aCommon, 0); + assertMax('output aCommon + nCommon', aCommon + nCommon, a.length); + assertMin('output bCommon', bCommon, 0); + assertMax('output bCommon + nCommon', bCommon + nCommon, b.length); + assertCommonSubstring(a, b, nCommon, aCommon, bCommon); + array.push(a.slice(aCommon, aCommon + nCommon)); + }, + ); + return array; +}; + +describe('common substrings', () => { + // Find changed and unchanged substrings within adjacent changed lines + // in expected and received values after a test fails in Jest. + test('progress', () => { + // Confirm expected progress. If change is correct, then update test. + // A property value changes from an object to an array of objects. + // prettier-ignore + const a = [ + '"sorting": Object {', + '"ascending": true,', + ].join('\n'); + // prettier-ignore + const b = [ + '"sorting": Array [', + 'Object {', + '"descending": false,', + ].join('\n'); + const expected = ['"sorting": ', 'Object {\n"', 'scending": ', 'e,']; + const abCommonSubstrings = findCommonSubstrings(a, b); + const baCommonSubstrings = findCommonSubstrings(b, a); + expect(abCommonSubstrings).toEqual(baCommonSubstrings); + expect(abCommonSubstrings).toEqual(expected); + }); + test('regression', () => { + // Prevent unexpected regression. If change is incorrect, then fix code. + // Internationalization fails for a text node. + // English translation and French quotation by Antoine de Saint Exupéry: + const a = `It seems that perfection is attained not when there is nothing more to add, but when there is nothing more to remove.`; + const b = `Il semble que la perfection soit atteinte non quand il n'y a plus rien à ajouter, mais quand il n'y a plus rien à retrancher.`; + const abCommonSubstrings = findCommonSubstrings(a, b); + const baCommonSubstrings = findCommonSubstrings(b, a); + expect(abCommonSubstrings).toEqual(baCommonSubstrings); + expect(abCommonSubstrings).toMatchSnapshot(); + }); + test('wrapping', () => { + const a = [ + 'When engineers are provided with ready-to-use tools, they end up writing more', + 'tests, which in turn results in more stable code bases.', + ].join('\n'); + const b = [ + 'When engineers have ready-to-use tools, they write more tests, which results in', + 'more stable and healthy code bases.', + ].join('\n'); + const abCommonSubstrings = findCommonSubstrings(a, b); + const baCommonSubstrings = findCommonSubstrings(b, a); + expect(abCommonSubstrings).toEqual(baCommonSubstrings); + expect(abCommonSubstrings).toMatchSnapshot(); + }); +}); diff --git a/packages/diff-sequences/src/index.js b/packages/diff-sequences/src/index.js new file mode 100644 index 000000000000..71ce25849d7f --- /dev/null +++ b/packages/diff-sequences/src/index.js @@ -0,0 +1,874 @@ +/** + * Copyright (c) 2014-present, Facebook, Inc. All rights reserved. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +// This diff-sequences package implements the linear space variation in +// An O(ND) Difference Algorithm and Its Variations by Eugene W. Myers + +// Relationship in notation between Myers paper and this package: +// A is a +// N is aLength, aEnd - aStart, and so on +// x is aIndex, aFirst, aLast, and so on +// B is b +// M is bLength, bEnd - bStart, and so on +// y is bIndex, bFirst, bLast, and so on +// Δ = N - M is negative of baDeltaLength = bLength - aLength +// D is d +// k is kF +// k + Δ is kF = kR - baDeltaLength +// V is aIndexesF or aIndexesR (see comment below about Indexes type) +// index intervals [1, N] and [1, M] are [0, aLength) and [0, bLength) +// starting point in forward direction (0, 0) is (-1, -1) +// starting point in reverse direction (N + 1, M + 1) is (aLength, bLength) + +// The “edit graph” for sequences a and b corresponds to items: +// in a on the horizontal axis +// in b on the vertical axis +// +// Given a-coordinate of a point in a diagonal, you can compute b-coordinate. +// +// Forward diagonals kF: +// zero diagonal intersects top left corner +// positive diagonals intersect top edge +// negative diagonals insersect left edge +// +// Reverse diagonals kR: +// zero diagonal intersects bottom right corner +// positive diagonals intersect right edge +// negative diagonals intersect bottom edge + +// The graph contains a directed acyclic graph of edges: +// horizontal: delete an item from a +// vertical: insert an item from b +// diagonal: common item in a and b +// +// The algorithm solves dual problems in the graph analogy: +// Find longest common subsequence: path with maximum number of diagonal edges +// Find shortest edit script: path with minimum number of non-diagonal edges + +// Input callback function compares items at indexes in the sequences. +type IsCommon = ( + aIndex: number, // caller can assume: 0 <= aIndex && aIndex < aLength + bIndex: number, // caller can assume: 0 <= bIndex && bIndex < bLength +) => boolean; + +// Output callback function receives the number of adjacent items +// and starting indexes of each common subsequence. +type FoundSubsequence = ( + nCommon: number, // caller can assume: 0 < nCommon + aCommon: number, // caller can assume: 0 <= aCommon && aCommon < aLength + bCommon: number, // caller can assume: 0 <= bCommon && bCommon < bLength +) => void; + +// Either original functions or wrapped to swap indexes if graph is transposed. +type Callbacks = {| + foundSubsequence: FoundSubsequence, + isCommon: IsCommon, +|}; + +// Indexes in sequence a of last point of forward or reverse paths in graph. +// Myers algorithm indexes by diagonal k which for negative is bad deopt in V8. +// This package indexes by iF and iR which are greater than or equal to zero. +// and also updates the index arrays in place to cut memory in half. +// kF = 2 * iF - d +// kR = d - 2 * iR +type Indexes = Array; + +// Division of index intervals in sequences a and b at the middle change. +// Invariant: intervals do not have common items at the start or end. +type Division = {| + // The end of interval preceding division is open like array slice method. + nChangePreceding: number, // number of change items + aEndPreceding: number, + bEndPreceding: number, + + nCommonPreceding: number, // 0 if no common items preceding middle change + aCommonPreceding: number, // ignore prop value if nCommonPreceding === 0 + bCommonPreceding: number, // ignore prop value if nCommonPreceding === 0 + + nCommonFollowing: number, // 0 if no common items following middle change + aCommonFollowing: number, // ignore prop value if nCommonFollowing === 0 + bCommonFollowing: number, // ignore prop value if nCommonFollowing === 0 + + // The start of interval following division is closed like array slice method. + nChangeFollowing: number, // number of change items + aStartFollowing: number, + bStartFollowing: number, +|}; + +const pkg = 'diff-sequences'; // for error messages +const NOT_YET_SET = 0; // small int instead of undefined to avoid deopt in V8 + +// Return the number of common items that follow in forward direction. +// The length of what Myers paper calls a “snake” in a forward path. +const countCommonItemsF = ( + aIndex: number, + aEnd: number, + bIndex: number, + bEnd: number, + isCommon: IsCommon, +) => { + let nCommon = 0; + while (aIndex < aEnd && bIndex < bEnd && isCommon(aIndex, bIndex)) { + aIndex += 1; + bIndex += 1; + nCommon += 1; + } + return nCommon; +}; + +// Return the number of common items that precede in reverse direction. +// The length of what Myers paper calls a “snake” in a reverse path. +const countCommonItemsR = ( + aStart: number, + aIndex: number, + bStart: number, + bIndex: number, + isCommon: IsCommon, +) => { + let nCommon = 0; + while (aStart <= aIndex && bStart <= bIndex && isCommon(aIndex, bIndex)) { + aIndex -= 1; + bIndex -= 1; + nCommon += 1; + } + return nCommon; +}; + +// A simple function to extend forward paths from (d - 1) to d changes +// when forward and reverse paths cannot yet overlap. +const extendPathsF = ( + d: number, + aEnd: number, + bEnd: number, + bF: number, // bIndex = bF + aIndex - kF + isCommon: IsCommon, + aIndexesF: Indexes, // update indexes in sequence a of paths in diagonals kF + iMaxF: number, // return the value because optimization might decrease it +): number => { + // Unroll the first iteration. + let iF = 0; + let kF = -d; // kF = 2 * iF - d + let aFirst = aIndexesF[iF]; // in first iteration always insert + let aIndexPrev1 = aFirst; // prev value of [iF - 1] in next iteration + aIndexesF[iF] += countCommonItemsF( + aFirst + 1, + aEnd, + bF + aFirst - kF + 1, + bEnd, + isCommon, + ); + + // Optimization: skip diagonals in which paths cannot ever overlap. + const nF = d < iMaxF ? d : iMaxF; + + // The diagonals kF are odd when d is odd and even when d is even. + for (iF += 1, kF += 2; iF <= nF; iF += 1, kF += 2) { + // To get first point of path segment, move one change in forward direction + // from last point of previous path segment in an adjacent diagonal. + // In last possible iteration when iF === d and kF === d always delete. + if (iF !== d && aIndexPrev1 < aIndexesF[iF]) { + aFirst = aIndexesF[iF]; // vertical to insert from b + } else { + aFirst = aIndexPrev1 + 1; // horizontal to delete from a + + if (aEnd <= aFirst) { + // Optimization: delete moved past right of graph. + return iF - 1; + } + } + + // To get last point of path segment, move along diagonal of common items. + aIndexPrev1 = aIndexesF[iF]; + aIndexesF[iF] = + aFirst + + countCommonItemsF(aFirst + 1, aEnd, bF + aFirst - kF + 1, bEnd, isCommon); + } + + return iMaxF; +}; + +// A simple function to extend reverse paths from (d - 1) to d changes +// when reverse and forward paths cannot yet overlap. +const extendPathsR = ( + d: number, + aStart: number, + bStart: number, + bR: number, // bIndex = bR + aIndex - kR + isCommon: IsCommon, + aIndexesR: Indexes, // update indexes in sequence a of paths in diagonals kR + iMaxR: number, // return the value because optimization might decrease it +): number => { + // Unroll the first iteration. + let iR = 0; + let kR = d; // kR = d - 2 * iR + let aFirst = aIndexesR[iR]; // in first iteration always insert + let aIndexPrev1 = aFirst; // prev value of [iR - 1] in next iteration + aIndexesR[iR] -= countCommonItemsR( + aStart, + aFirst - 1, + bStart, + bR + aFirst - kR - 1, + isCommon, + ); + + // Optimization: skip diagonals in which paths cannot ever overlap. + const nR = d < iMaxR ? d : iMaxR; + + // The diagonals kR are odd when d is odd and even when d is even. + for (iR += 1, kR -= 2; iR <= nR; iR += 1, kR -= 2) { + // To get first point of path segment, move one change in reverse direction + // from last point of previous path segment in an adjacent diagonal. + // In last possible iteration when iR === d and kR === -d always delete. + if (iR !== d && aIndexesR[iR] < aIndexPrev1) { + aFirst = aIndexesR[iR]; // vertical to insert from b + } else { + aFirst = aIndexPrev1 - 1; // horizontal to delete from a + + if (aFirst < aStart) { + // Optimization: delete moved past left of graph. + return iR - 1; + } + } + + // To get last point of path segment, move along diagonal of common items. + aIndexPrev1 = aIndexesR[iR]; + aIndexesR[iR] = + aFirst - + countCommonItemsR( + aStart, + aFirst - 1, + bStart, + bR + aFirst - kR - 1, + isCommon, + ); + } + + return iMaxR; +}; + +// A complete function to extend forward paths from (d - 1) to d changes. +// Return true if a path overlaps reverse path of (d - 1) changes in its diagonal. +const extendOverlappablePathsF = ( + d: number, + aStart: number, + aEnd: number, + bStart: number, + bEnd: number, + isCommon: IsCommon, + aIndexesF: Indexes, // update indexes in sequence a of paths in diagonals kF + iMaxF: number, + aIndexesR: Indexes, + iMaxR: number, + division: Division, // update prop values if return true +): boolean => { + const bF = bStart - aStart; // bIndex = bF + aIndex - kF + const aLength = aEnd - aStart; + const bLength = bEnd - bStart; + const baDeltaLength = bLength - aLength; // kF = kR - baDeltaLength + + // Range of diagonals in which forward and reverse paths might overlap. + const kMinOverlapF = -baDeltaLength - (d - 1); // -(d - 1) <= kR + const kMaxOverlapF = -baDeltaLength + (d - 1); // kR <= (d - 1) + + let aIndexPrev1 = NOT_YET_SET; // prev value of [iF - 1] in next iteration + + // Optimization: skip diagonals in which paths cannot ever overlap. + const nF = d < iMaxF ? d : iMaxF; + + // The diagonals kF = 2 * iF - d are odd when d is odd and even when d is even. + for (let iF = 0, kF = -d; iF <= nF; iF += 1, kF += 2) { + // To get first point of path segment, move one change in forward direction + // from last point of previous path segment in an adjacent diagonal. + // In first iteration when iF === 0 and kF === -d always insert. + // In last possible iteration when iF === d and kF === d always delete. + const insert = iF === 0 || (iF !== d && aIndexPrev1 < aIndexesF[iF]); + const aLastPrev = insert ? aIndexesF[iF] : aIndexPrev1; + const aFirst = insert + ? aLastPrev // vertical to insert from b + : aLastPrev + 1; // horizontal to delete from a + + // To get last point of path segment, move along diagonal of common items. + const bFirst = bF + aFirst - kF; + const nCommonF = countCommonItemsF( + aFirst + 1, + aEnd, + bFirst + 1, + bEnd, + isCommon, + ); + const aLast = aFirst + nCommonF; + + aIndexPrev1 = aIndexesF[iF]; + aIndexesF[iF] = aLast; + + if (kMinOverlapF <= kF && kF <= kMaxOverlapF) { + // Solve for iR of reverse path with (d - 1) changes in diagonal kF: + // kR = kF + baDeltaLength + // kR = (d - 1) - 2 * iR + const iR = (d - 1 - (kF + baDeltaLength)) / 2; + + // If this forward path overlaps the reverse path in this diagonal, + // then this is the middle change of the index intervals. + if (iR <= iMaxR && aIndexesR[iR] - 1 <= aLast) { + // Unlike the Myers algorithm which finds only the middle “snake” + // this package can find two common subsequences per division. + // Last point of previous path segment is on an adjacent diagonal. + const bLastPrev = bF + aLastPrev - (insert ? kF + 1 : kF - 1); + + // Because of invariant that intervals preceding the middle change + // cannot have common items at the end, + // move in reverse direction along a diagonal of common items. + const nCommonR = countCommonItemsR( + aStart, + aLastPrev, + bStart, + bLastPrev, + isCommon, + ); + + const aIndexPrevFirst = aLastPrev - nCommonR; + const bIndexPrevFirst = bLastPrev - nCommonR; + + const aEndPreceding = aIndexPrevFirst + 1; + const bEndPreceding = bIndexPrevFirst + 1; + + division.nChangePreceding = d - 1; + if (d - 1 === aEndPreceding + bEndPreceding - aStart - bStart) { + // Optimization: number of preceding changes in forward direction + // is equal to number of items in preceding interval, + // therefore it cannot contain any common items. + division.aEndPreceding = aStart; + division.bEndPreceding = bStart; + } else { + division.aEndPreceding = aEndPreceding; + division.bEndPreceding = bEndPreceding; + } + + division.nCommonPreceding = nCommonR; + if (nCommonR !== 0) { + division.aCommonPreceding = aEndPreceding; + division.bCommonPreceding = bEndPreceding; + } + + division.nCommonFollowing = nCommonF; + if (nCommonF !== 0) { + division.aCommonFollowing = aFirst + 1; + division.bCommonFollowing = bFirst + 1; + } + + const aStartFollowing = aLast + 1; + const bStartFollowing = bFirst + nCommonF + 1; + + division.nChangeFollowing = d - 1; + if (d - 1 === aEnd + bEnd - aStartFollowing - bStartFollowing) { + // Optimization: number of changes in reverse direction + // is equal to number of items in following interval, + // therefore it cannot contain any common items. + division.aStartFollowing = aEnd; + division.bStartFollowing = bEnd; + } else { + division.aStartFollowing = aStartFollowing; + division.bStartFollowing = bStartFollowing; + } + + return true; + } + } + } + + return false; +}; + +// A complete function to extend reverse paths from (d - 1) to d changes. +// Return true if a path overlaps forward path of d changes in its diagonal. +const extendOverlappablePathsR = ( + d: number, + aStart: number, + aEnd: number, + bStart: number, + bEnd: number, + isCommon: IsCommon, + aIndexesF: Indexes, + iMaxF: number, + aIndexesR: Indexes, // update indexes in sequence a of paths in diagonals kR + iMaxR: number, + division: Division, // update prop values if return true +): boolean => { + const bR = bEnd - aEnd; // bIndex = bR + aIndex - kR + const aLength = aEnd - aStart; + const bLength = bEnd - bStart; + const baDeltaLength = bLength - aLength; // kR = kF + baDeltaLength + + // Range of diagonals in which forward and reverse paths might overlap. + const kMinOverlapR = baDeltaLength - d; // -d <= kF + const kMaxOverlapR = baDeltaLength + d; // kF <= d + + let aIndexPrev1 = NOT_YET_SET; // prev value of [iR - 1] in next iteration + + // Optimization: skip diagonals in which paths cannot ever overlap. + const nR = d < iMaxR ? d : iMaxR; + + // The diagonals kR = d - 2 * iR are odd when d is odd and even when d is even. + for (let iR = 0, kR = d; iR <= nR; iR += 1, kR -= 2) { + // To get first point of path segment, move one change in reverse direction + // from last point of previous path segment in an adjacent diagonal. + // In first iteration when iR === 0 and kR === d always insert. + // In last possible iteration when iR === d and kR === -d always delete. + const insert = iR === 0 || (iR !== d && aIndexesR[iR] < aIndexPrev1); + const aLastPrev = insert ? aIndexesR[iR] : aIndexPrev1; + const aFirst = insert + ? aLastPrev // vertical to insert from b + : aLastPrev - 1; // horizontal to delete from a + + // To get last point of path segment, move along diagonal of common items. + const bFirst = bR + aFirst - kR; + const nCommonR = countCommonItemsR( + aStart, + aFirst - 1, + bStart, + bFirst - 1, + isCommon, + ); + const aLast = aFirst - nCommonR; + + aIndexPrev1 = aIndexesR[iR]; + aIndexesR[iR] = aLast; + + if (kMinOverlapR <= kR && kR <= kMaxOverlapR) { + // Solve for iF of forward path with d changes in diagonal kR: + // kF = kR - baDeltaLength + // kF = 2 * iF - d + const iF = (d + (kR - baDeltaLength)) / 2; + + // If this reverse path overlaps the forward path in this diagonal, + // then this is a middle change of the index intervals. + if (iF <= iMaxF && aLast - 1 <= aIndexesF[iF]) { + const bLast = bFirst - nCommonR; + + division.nChangePreceding = d; + if (d === aLast + bLast - aStart - bStart) { + // Optimization: number of changes in reverse direction + // is equal to number of items in preceding interval, + // therefore it cannot contain any common items. + division.aEndPreceding = aStart; + division.bEndPreceding = bStart; + } else { + division.aEndPreceding = aLast; + division.bEndPreceding = bLast; + } + + division.nCommonPreceding = nCommonR; + if (nCommonR !== 0) { + // The last point of reverse path segment is start of common subsequence. + division.aCommonPreceding = aLast; + division.bCommonPreceding = bLast; + } + + division.nChangeFollowing = d - 1; + if (d === 1) { + // There is no previous path segment. + division.nCommonFollowing = 0; + division.aStartFollowing = aEnd; + division.bStartFollowing = bEnd; + } else { + // Unlike the Myers algorithm which finds only the middle “snake” + // this package can find two common subsequences per division. + // Last point of previous path segment is on an adjacent diagonal. + const bLastPrev = bR + aLastPrev - (insert ? kR - 1 : kR + 1); + + // Because of invariant that intervals following the middle change + // cannot have common items at the start, + // move in forward direction along a diagonal of common items. + const nCommonF = countCommonItemsF( + aLastPrev, + aEnd, + bLastPrev, + bEnd, + isCommon, + ); + + division.nCommonFollowing = nCommonF; + if (nCommonF !== 0) { + // The last point of reverse path segment is start of common subsequence. + division.aCommonFollowing = aLastPrev; + division.bCommonFollowing = bLastPrev; + } + + const aStartFollowing = aLastPrev + nCommonF; // aFirstPrev + const bStartFollowing = bLastPrev + nCommonF; // bFirstPrev + + if (d - 1 === aEnd + bEnd - aStartFollowing - bStartFollowing) { + // Optimization: number of changes in forward direction + // is equal to number of items in following interval, + // therefore it cannot contain any common items. + division.aStartFollowing = aEnd; + division.bStartFollowing = bEnd; + } else { + division.aStartFollowing = aStartFollowing; + division.bStartFollowing = bStartFollowing; + } + } + + return true; + } + } + } + + return false; +}; + +// Given index intervals and input function to compare items at indexes, +// divide at the middle change. +// +// DO NOT CALL if start === end, because interval cannot contain common items +// and because this function will throw the “no overlap” error. +const divide = ( + nChange: number, + aStart: number, + aEnd: number, + bStart: number, + bEnd: number, + isCommon: IsCommon, + aIndexesF: Indexes, // temporary memory, not input nor output + aIndexesR: Indexes, // temporary memory, not input nor output + division: Division, // output +) => { + const bF = bStart - aStart; // bIndex = bF + aIndex - kF + const bR = bEnd - aEnd; // bIndex = bR + aIndex - kR + const aLength = aEnd - aStart; + const bLength = bEnd - bStart; + + // Because graph has square or portrait orientation, + // length difference is minimum number of items to insert from b. + // Corresponding forward and reverse diagonals in graph + // depend on length difference of the sequences: + // kF = kR - baDeltaLength + // kR = kF + baDeltaLength + const baDeltaLength = bLength - aLength; + + // Optimization: max diagonal in graph intersects corner of shorter side. + let iMaxF = aLength; + let iMaxR = aLength; + + // Initialize no changes yet in forward or reverse direction: + aIndexesF[0] = aStart - 1; // at open start of interval, outside closed start + aIndexesR[0] = aEnd; // at open end of interval + + if (baDeltaLength % 2 === 0) { + // The number of changes in paths is 2 * d if length difference is even. + const dMin = (nChange || baDeltaLength) / 2; + const dMax = (aLength + bLength) / 2; + + for (let d = 1; d <= dMax; d += 1) { + iMaxF = extendPathsF(d, aEnd, bEnd, bF, isCommon, aIndexesF, iMaxF); + + if (d < dMin) { + iMaxR = extendPathsR(d, aStart, bStart, bR, isCommon, aIndexesR, iMaxR); + } else if ( + // If a reverse path overlaps a forward path in the same diagonal, + // return a division of the index intervals at the middle change. + extendOverlappablePathsR( + d, + aStart, + aEnd, + bStart, + bEnd, + isCommon, + aIndexesF, + iMaxF, + aIndexesR, + iMaxR, + division, + ) + ) { + return; + } + } + } else { + // The number of changes in paths is 2 * d - 1 if length difference is odd. + const dMin = ((nChange || baDeltaLength) + 1) / 2; + const dMax = (aLength + bLength + 1) / 2; + + // Unroll first half iteration so loop extends the relevant pairs of paths. + // Because of invariant that intervals have no common items at start or end, + // and limitation not to call divide with empty intervals, + // therefore it cannot be called if a forward path with one change + // would overlap a reverse path with no changes, even if dMin === 1. + let d = 1; + iMaxF = extendPathsF(d, aEnd, bEnd, bF, isCommon, aIndexesF, iMaxF); + + for (d += 1; d <= dMax; d += 1) { + iMaxR = extendPathsR( + d - 1, + aStart, + bStart, + bR, + isCommon, + aIndexesR, + iMaxR, + ); + + if (d < dMin) { + iMaxF = extendPathsF(d, aEnd, bEnd, bF, isCommon, aIndexesF, iMaxF); + } else if ( + // If a forward path overlaps a reverse path in the same diagonal, + // return a division of the index intervals at the middle change. + extendOverlappablePathsF( + d, + aStart, + aEnd, + bStart, + bEnd, + isCommon, + aIndexesF, + iMaxF, + aIndexesR, + iMaxR, + division, + ) + ) { + return; + } + } + } + + /* istanbul ignore next */ + throw new Error( + `${pkg}: no overlap aStart=${aStart} aEnd=${aEnd} bStart=${bStart} bEnd=${bEnd}`, + ); +}; + +// Given index intervals and input function to compare items at indexes, +// return by output function the number of adjacent items and starting indexes +// of each common subsequence. Divide and conquer with only linear space. +// +// The index intervals are half open [start, end) like array slice method. +// DO NOT CALL if start === end, because interval cannot contain common items +// and because divide function will throw the “no overlap” error. +const findSubsequences = ( + nChange: number, + aStart: number, + aEnd: number, + bStart: number, + bEnd: number, + transposed: boolean, + callbacks: Array, + aIndexesF: Indexes, // temporary memory, not input nor output + aIndexesR: Indexes, // temporary memory, not input nor output + division: Division, // temporary memory, not input nor output +) => { + if (bEnd - bStart < aEnd - aStart) { + // Transpose graph so it has portrait instead of landscape orientation. + // Always compare shorter to longer sequence for consistency and optimization. + transposed = !transposed; + if (transposed && callbacks.length === 1) { + // Lazily wrap callback functions to swap args if graph is transposed. + const {foundSubsequence, isCommon} = callbacks[0]; + callbacks[1] = { + foundSubsequence: (nCommon, bCommon, aCommon) => { + foundSubsequence(nCommon, aCommon, bCommon); + }, + isCommon: (bIndex, aIndex) => isCommon(aIndex, bIndex), + }; + } + + const tStart = aStart; + const tEnd = aEnd; + aStart = bStart; + aEnd = bEnd; + bStart = tStart; + bEnd = tEnd; + } + const {foundSubsequence, isCommon} = callbacks[transposed ? 1 : 0]; + + // Divide the index intervals at the middle change. + divide( + nChange, + aStart, + aEnd, + bStart, + bEnd, + isCommon, + aIndexesF, + aIndexesR, + division, + ); + const { + nChangePreceding, + aEndPreceding, + bEndPreceding, + nCommonPreceding, + aCommonPreceding, + bCommonPreceding, + nCommonFollowing, + aCommonFollowing, + bCommonFollowing, + nChangeFollowing, + aStartFollowing, + bStartFollowing, + } = division; + + // Unless either index interval is empty, they might contain common items. + if (aStart < aEndPreceding && bStart < bEndPreceding) { + // Recursely find and return common subsequences preceding the division. + findSubsequences( + nChangePreceding, + aStart, + aEndPreceding, + bStart, + bEndPreceding, + transposed, + callbacks, + aIndexesF, + aIndexesR, + division, + ); + } + + // Return common subsequences that are adjacent to the middle change. + if (nCommonPreceding !== 0) { + foundSubsequence(nCommonPreceding, aCommonPreceding, bCommonPreceding); + } + if (nCommonFollowing !== 0) { + foundSubsequence(nCommonFollowing, aCommonFollowing, bCommonFollowing); + } + + // Unless either index interval is empty, they might contain common items. + if (aStartFollowing < aEnd && bStartFollowing < bEnd) { + // Recursely find and return common subsequences following the division. + findSubsequences( + nChangeFollowing, + aStartFollowing, + aEnd, + bStartFollowing, + bEnd, + transposed, + callbacks, + aIndexesF, + aIndexesR, + division, + ); + } +}; + +const validateLength = (name: string, arg: any) => { + const type = typeof arg; + if (type !== 'number') { + throw new TypeError(`${pkg}: ${name} typeof ${type} is not a number`); + } + if (!Number.isSafeInteger(arg)) { + throw new RangeError(`${pkg}: ${name} value ${arg} is not a safe integer`); + } + if (arg < 0) { + throw new RangeError(`${pkg}: ${name} value ${arg} is a negative integer`); + } +}; + +const validateCallback = (name: string, arg: any) => { + const type = typeof arg; + if (type !== 'function') { + throw new TypeError(`${pkg}: ${name} typeof ${type} is not a function`); + } +}; + +// Compare items in two sequences to find a longest common subsequence. +// Given lengths of sequences and input function to compare items at indexes, +// return by output function the number of adjacent items and starting indexes +// of each common subsequence. +export default ( + aLength: number, + bLength: number, + isCommon: IsCommon, + foundSubsequence: FoundSubsequence, +) => { + validateLength('aLength', aLength); + validateLength('bLength', bLength); + validateCallback('isCommon', isCommon); + validateCallback('foundSubsequence', foundSubsequence); + + // Count common items from the start in the forward direction. + const nCommonF = countCommonItemsF(0, aLength, 0, bLength, isCommon); + + if (nCommonF !== 0) { + foundSubsequence(nCommonF, 0, 0); + } + + // Unless both sequences consist of common items only, + // find common items in the half-trimmed index intervals. + if (aLength !== nCommonF || bLength !== nCommonF) { + // Invariant: intervals do not have common items at the start. + // The start of an index interval is closed like array slice method. + const aStart = nCommonF; + const bStart = nCommonF; + + // Count common items from the end in the reverse direction. + const nCommonR = countCommonItemsR( + aStart, + aLength - 1, + bStart, + bLength - 1, + isCommon, + ); + + // Invariant: intervals do not have common items at the end. + // The end of an index interval is open like array slice method. + const aEnd = aLength - nCommonR; + const bEnd = bLength - nCommonR; + + // Unless one sequence consists of common items only, + // therefore the other trimmed index interval consists of changes only, + // find common items in the trimmed index intervals. + const nCommonFR = nCommonF + nCommonR; + if (aLength !== nCommonFR && bLength !== nCommonFR) { + const nChange = 0; // number of change items is not yet known + const transposed = false; // call the original unwrapped functions + const callbacks = [{foundSubsequence, isCommon}]; + + // Indexes in sequence a of last points in furthest reaching paths + // from outside the start at top left in the forward direction: + const aIndexesF = [NOT_YET_SET]; + // from the end at bottom right in the reverse direction: + const aIndexesR = [NOT_YET_SET]; + + // Initialize one object as output of all calls to divide function. + const division = { + aCommonFollowing: NOT_YET_SET, + aCommonPreceding: NOT_YET_SET, + aEndPreceding: NOT_YET_SET, + aStartFollowing: NOT_YET_SET, + bCommonFollowing: NOT_YET_SET, + bCommonPreceding: NOT_YET_SET, + bEndPreceding: NOT_YET_SET, + bStartFollowing: NOT_YET_SET, + nChangeFollowing: NOT_YET_SET, + nChangePreceding: NOT_YET_SET, + nCommonFollowing: NOT_YET_SET, + nCommonPreceding: NOT_YET_SET, + }; + + // Find and return common subsequences in the trimmed index intervals. + findSubsequences( + nChange, + aStart, + aEnd, + bStart, + bEnd, + transposed, + callbacks, + aIndexesF, + aIndexesR, + division, + ); + } + + if (nCommonR !== 0) { + foundSubsequence(nCommonR, aEnd, bEnd); + } + } +};