(state, draftState => stateUpdater(draftState))
+ stateHolder.current = nextState;
return nextState;
})
- }
-
- const dispatch: (actionName: P['name'], params: P['params']) => void = (actionName, params) => {
+ }, [setState])
+ const dispatch:
(actionName: P['name'], params: P['params']) => void = useCallback((actionName, params) => {
if (typeof actions[actionName] === 'function') {
-
+ function select (): GlobalState {
+ return stateHolder.current
+ }
// todo: fix the any type
- actions[actionName](state, updateState, params as any);
+ actions[actionName](select, updateState, params as any);
// actions['subspaceSearch'](state, updateState, params)
}
- }
+ }, [updateState])
diff --git a/packages/frontend/src/utils/useComposeState.tsx b/packages/frontend/src/utils/useComposeState.tsx
index 9617447b..794ee52b 100644
--- a/packages/frontend/src/utils/useComposeState.tsx
+++ b/packages/frontend/src/utils/useComposeState.tsx
@@ -1,4 +1,4 @@
-import React, { useState } from 'react';
+import { useState, useCallback } from 'react';
import produce, { Draft } from 'immer';
/**
* @param S type of the composed state
@@ -19,9 +19,11 @@ export type StateUpdater = (draftState: Draft) => void
*/
export default function useComposeState(initState: S): [S, (stateUpdater: StateUpdater) => void] {
const [state, setState] = useState(initState)
- const updateState = (stateUpdater: StateUpdater) => {
- const nextState = produce(state, draftState => stateUpdater(draftState))
- setState(nextState)
- }
+ const updateState = useCallback((stateUpdater: StateUpdater) => {
+ setState(state => {
+ const nextState = produce(state, draftState => stateUpdater(draftState))
+ return nextState
+ })
+ }, [setState])
return [state, updateState]
}
\ No newline at end of file
diff --git a/packages/frontend/src/visBuilder/vegaBase.tsx b/packages/frontend/src/visBuilder/vegaBase.tsx
index 217b6282..0a26a55f 100644
--- a/packages/frontend/src/visBuilder/vegaBase.tsx
+++ b/packages/frontend/src/visBuilder/vegaBase.tsx
@@ -1,7 +1,7 @@
import React, { useEffect, useRef, useMemo } from 'react';
import aggregate from 'cube-core';
import embed from 'vega-embed';
-import { DataSource, Field, FieldType } from '../global'
+import { DataSource, Field } from '../global'
import { baseVis } from '../queries/index';
export const geomTypeMap: {[key: string]: any} = {
interval: 'bar',
@@ -53,7 +53,7 @@ const BaseChart: React.FC = (props) => {
as: `${mea}_${aggregator}`
}
})
- }, [measures])
+ }, [measures, aggregator])
let table = useMemo(() => {
if (!defaultAggregated) {
diff --git a/packages/frontend/src/workers/cluster.worker.js b/packages/frontend/src/workers/cluster.worker.js
index e5c24776..d7b90a52 100644
--- a/packages/frontend/src/workers/cluster.worker.js
+++ b/packages/frontend/src/workers/cluster.worker.js
@@ -1,5 +1,7 @@
/* eslint no-restricted-globals: 0 */
-import { kruskalMST } from 'visual-insights';
+import { Cluster } from 'visual-insights';
+import { timer } from './timer';
+
const PearsonThreshold = 0.5;
function sum (arr) {
let ans = 0;
@@ -10,12 +12,11 @@ function sum (arr) {
return ans;
}
const cluster = (e) => {
- console.log('[cluster measures]')
try {
const { spaces, maxGroupNumber } = e.data;
let result = [];
for (let space of spaces) {
- const { edgesInMST, groups } = kruskalMST(space.matrix, maxGroupNumber, PearsonThreshold);
+ const { edgesInMST, groups } = Cluster.kruskalWithFullMST(space.matrix, maxGroupNumber, PearsonThreshold);
let measureGroups = new Map();
for (let i = 0; i < groups.length; i++) {
if (!measureGroups.has(groups[i])) {
@@ -44,4 +45,4 @@ const cluster = (e) => {
}
}
-self.addEventListener('message', cluster, false);
+self.addEventListener('message', timer(cluster), false);
diff --git a/packages/frontend/src/workers/combineFields.worker.js b/packages/frontend/src/workers/combineFields.worker.js
index a0b35192..7e92202b 100644
--- a/packages/frontend/src/workers/combineFields.worker.js
+++ b/packages/frontend/src/workers/combineFields.worker.js
@@ -1,11 +1,11 @@
/* eslint no-restricted-globals: 0 */
-import { analysisDimensions } from 'visual-insights'
+import { Insight } from 'visual-insights'
+import { timer } from './timer';
const combineFields = (e) => {
- console.log('[combine fields]')
try {
const { dataSource, dimensions, measures, operator, topKPercent = 1 } = e.data;
- let impurityList = analysisDimensions(dataSource, dimensions, measures, operator).map(dimReport => {
+ let impurityList = Insight.insightExtraction(dataSource, dimensions, measures, operator).map(dimReport => {
let sum = 0;
for (let key in dimReport[1]) {
sum += dimReport[1][key];
@@ -41,4 +41,4 @@ const combineFields = (e) => {
}
}
-self.addEventListener('message', combineFields, false);
\ No newline at end of file
+self.addEventListener('message', timer(combineFields), false);
\ No newline at end of file
diff --git a/packages/frontend/src/workers/dashboard.worker.js b/packages/frontend/src/workers/dashboard.worker.js
index e82d9f0a..b69bd926 100644
--- a/packages/frontend/src/workers/dashboard.worker.js
+++ b/packages/frontend/src/workers/dashboard.worker.js
@@ -1,5 +1,6 @@
/* eslint no-restricted-globals: 0 */
import { DashBoard } from 'visual-insights';
+import { timer } from './timer';
function transSubspaces2FieldsFeature(subspaces) {
let fieldFeatureList = [];
@@ -43,4 +44,4 @@ const generateDashBoard = (e) => {
}
}
-self.addEventListener('message', generateDashBoard, false);
\ No newline at end of file
+self.addEventListener('message', timer(generateDashBoard), false);
\ No newline at end of file
diff --git a/packages/frontend/src/workers/fieldsSummary.worker.js b/packages/frontend/src/workers/fieldsSummary.worker.js
index 93f7ce13..4e99a9cb 100644
--- a/packages/frontend/src/workers/fieldsSummary.worker.js
+++ b/packages/frontend/src/workers/fieldsSummary.worker.js
@@ -1,7 +1,8 @@
/* eslint no-restricted-globals: 0 */
import { UnivariateSummary } from 'visual-insights';
-const { getAllFieldsDistribution, getAllFieldTypes, getAllFieldsEntropy } = UnivariateSummary;
+import { timer } from './timer';
+const { getAllFieldsDistribution, getAllFieldTypes, getAllFieldsEntropy } = UnivariateSummary;
const fieldSummary = (e) => {
const { fields, dataSource } = e.data;
@@ -43,4 +44,4 @@ const fieldSummary = (e) => {
}
}
-self.addEventListener('message', fieldSummary, false)
\ No newline at end of file
+self.addEventListener('message', timer(fieldSummary), false)
\ No newline at end of file
diff --git a/packages/frontend/src/workers/groupFields.worker.js b/packages/frontend/src/workers/groupFields.worker.js
index 39cee289..2f97b338 100644
--- a/packages/frontend/src/workers/groupFields.worker.js
+++ b/packages/frontend/src/workers/groupFields.worker.js
@@ -1,8 +1,7 @@
/* eslint no-restricted-globals: 0 */
import { UnivariateSummary } from 'visual-insights';
-
+import { timer } from './timer';
const groupFields = (e) => {
- console.log('group fields worker');
try {
const { dataSource, fields } = e.data;
const result = UnivariateSummary.groupFields(dataSource, fields);
@@ -18,4 +17,4 @@ const groupFields = (e) => {
}
}
-self.addEventListener('message', groupFields, false);
\ No newline at end of file
+self.addEventListener('message', timer(groupFields), false);
\ No newline at end of file
diff --git a/packages/frontend/src/workers/timer.js b/packages/frontend/src/workers/timer.js
new file mode 100644
index 00000000..b3651d63
--- /dev/null
+++ b/packages/frontend/src/workers/timer.js
@@ -0,0 +1,15 @@
+/**
+ *
+ * @param {task} task is a function
+ */
+export function timer (task) {
+ return function (e) {
+ let startTime = new Date().getTime();
+ try {
+ task(e);
+ } finally {
+ let cost = new Date().getTime() - startTime;
+ console.log(`Task [${task.name}] cost ${cost} ms.`)
+ }
+ }
+}
\ No newline at end of file
diff --git a/packages/visual-insights/README.md b/packages/visual-insights/README.md
index 655ab8b1..a345634b 100644
--- a/packages/visual-insights/README.md
+++ b/packages/visual-insights/README.md
@@ -1,7 +1,8 @@
# Visual-Insights
-
+

+[](https://coveralls.io/github/Kanaries/Rath?branch=dev)
### API
diff --git a/packages/visual-insights/package.json b/packages/visual-insights/package.json
index d953b6fc..4443b6ae 100644
--- a/packages/visual-insights/package.json
+++ b/packages/visual-insights/package.json
@@ -10,15 +10,14 @@
"module": "./build/esm/index.js",
"types": "./build/esm/index.d.ts",
"dependencies": {
- "cube-core": "^2.13.0",
- "mocha": "^6.2.0",
- "typescript": "^3.6.3"
+ "cube-core": "^2.13.0"
},
"scripts": {
"build": "npm run buildfront && npm run buildback",
"buildback": "tsc -p ./tsconfig.cjs.json",
"buildfront": "tsc -p ./tsconfig.esm.json",
- "test": "npm run buildback && mocha --no-timeouts"
+ "test": "npm run buildback && mocha --no-timeouts",
+ "coverage": "istanbul cover _mocha --report lcovonly -- --no-timeouts -R spec && cat ./coverage/lcov.info | coveralls && rm -rf ./coverage"
},
"eslintConfig": {
"extends": "react-app"
@@ -34,5 +33,12 @@
"last 1 firefox version",
"last 1 safari version"
]
+ },
+ "devDependencies": {
+ "coveralls": "^3.0.9",
+ "istanbul": "^0.4.5",
+ "mocha-lcov-reporter": "^1.3.0",
+ "mocha": "^6.2.0",
+ "typescript": "^3.6.3"
}
}
diff --git a/packages/visual-insights/src/cleaner/index.ts b/packages/visual-insights/src/cleaner/index.ts
index e10081a0..5e3c2e25 100644
--- a/packages/visual-insights/src/cleaner/index.ts
+++ b/packages/visual-insights/src/cleaner/index.ts
@@ -1,4 +1,4 @@
-import { deepcopy, isFieldNumeric, isFieldTime, memberCount } from '../utils';
+import { deepcopy, isFieldNumeric, memberCount } from '../utils/index';
import { DataSource } from '../commonTypes';
function dropNullColumn (dataSource: DataSource, fields: string[]): { fields: string[]; dataSource: DataSource} {
let keepFields = fields.map(() => false);
diff --git a/packages/visual-insights/src/constant.ts b/packages/visual-insights/src/constant.ts
new file mode 100644
index 00000000..ff71243e
--- /dev/null
+++ b/packages/visual-insights/src/constant.ts
@@ -0,0 +1,4 @@
+export const CHANNEL = {
+ maxDimensionNumber: 8,
+ maxMeasureNumber: 6
+}
\ No newline at end of file
diff --git a/packages/visual-insights/src/dashboard/index.ts b/packages/visual-insights/src/dashboard/index.ts
index 75cd286e..e3503d0e 100644
--- a/packages/visual-insights/src/dashboard/index.ts
+++ b/packages/visual-insights/src/dashboard/index.ts
@@ -1,9 +1,9 @@
-import { FieldsFeature, correlation, linearMapPositive } from "../insights/impurity";
+import { FieldsFeature } from "../insights/impurity";
import { DataSource, OperatorType } from "../commonTypes";
-import cluster from "../insights/cluster";
+import { Cluster } from "../ml/index";
import aggregate from 'cube-core';
-import { normalize, entropy } from "../impurityMeasure";
-import { crammersV } from './utils';
+import { normalize, entropy } from "../statistics/index";
+import { crammersV, pearsonCC, linearMapPositive } from '../statistics/index';
import { CrammersVThreshold, PearsonCorrelation } from '../insights/config';
interface DashBoardSpace {
@@ -41,12 +41,12 @@ export function getDashBoardSubspace (dataSource: DataSource, dimensions: string
for (let i = 0; i < measures.length; i++) {
correlationMatrix[i][i] = 1;
for (let j = i + 1; j < measures.length; j++) {
- let r = correlation(dataSource, measures[i], measures[j]);
+ let r = pearsonCC(dataSource, measures[i], measures[j]);
correlationMatrix[j][i] = correlationMatrix[i][j] = r;
}
}
- const measureGroups = cluster({
+ const measureGroups = Cluster.kruskal({
matrix: correlationMatrix,
measures,
groupMaxSize: Math.round(measures.length / 6), // todo: make a config: max 6 measures in a dashboard
@@ -118,7 +118,7 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da
/**
* correlation view
*/
- const measureGroups = cluster({
+ const measureGroups = Cluster.kruskal({
matrix: dashBoardSpace.correlationMatrix,
measures: measures,
groupMaxSize: Math.round(measures.length / 3), // todo: make a config: max 3 measures in a chart
@@ -144,7 +144,7 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da
* impact views
* todo: protentional repeat view or very similiar view
*/
- const dimensionGroups = cluster({
+ const dimensionGroups = Cluster.kruskal({
matrix: dimensionCorrelationMatrix,
measures: dimensions,
groupMaxSize: 2, // todo: make a config: max 2 dimensions in a chart
diff --git a/packages/visual-insights/src/distribution.ts b/packages/visual-insights/src/distribution.ts
index 8d6827b4..f7922201 100644
--- a/packages/visual-insights/src/distribution.ts
+++ b/packages/visual-insights/src/distribution.ts
@@ -1,5 +1,5 @@
import { DataSource } from './commonTypes';
-import { memberCount } from './utils';
+import { memberCount } from './utils/index';
function isUniformDistribution(dataSource: DataSource, field: string): boolean {
const members = memberCount(dataSource, field);
diff --git a/packages/visual-insights/src/fieldAnalysis.ts b/packages/visual-insights/src/fieldAnalysis.ts
index 73675c62..fd57be69 100644
--- a/packages/visual-insights/src/fieldAnalysis.ts
+++ b/packages/visual-insights/src/fieldAnalysis.ts
@@ -1,3 +1,6 @@
+/**
+ * todo: delete this file, fieldsAnalysis is an old api.
+ */
import { DataSource, Field, FieldImpurity } from './commonTypes'
import {
@@ -10,13 +13,10 @@ import {
// isFieldNumeric,
isFieldTime,
isFieldContinous
-} from './utils';
+} from './utils/index';
import { isUniformDistribution } from './distribution';
-import {
- normalize,
- entropy,
-} from './impurityMeasure';
+import { normalize, entropy } from './statistics/index';
const MAGIC_NUMBER = 5;
diff --git a/packages/visual-insights/src/impurityMeasure.ts b/packages/visual-insights/src/impurityMeasure.ts
deleted file mode 100644
index 1d51c23e..00000000
--- a/packages/visual-insights/src/impurityMeasure.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-export type ImpurityFC = (probabilityList: number[]) => number;
-
-function normalize(frequencyList: number[]): number[] {
- let sum = 0;
- for (let f of frequencyList) {
- sum += f;
- }
- return frequencyList.map(f => f / sum);
-}
-
-const entropy: ImpurityFC = (probabilityList) => {
- let sum = 0;
- for (let p of probabilityList) {
- sum += p * Math.log2(p);
- }
- return -sum;
-}
-
-const gini: ImpurityFC = (probabilityList) => {
- let sum = 0;
- for (let p of probabilityList) {
- sum += p * (1 - p);
- }
- return sum;
-}
-
-export {
- normalize,
- entropy,
- gini
-}
\ No newline at end of file
diff --git a/packages/visual-insights/src/index.ts b/packages/visual-insights/src/index.ts
index 7e6422c2..150f135d 100644
--- a/packages/visual-insights/src/index.ts
+++ b/packages/visual-insights/src/index.ts
@@ -1,13 +1,9 @@
-import * as Utils from './utils'
-
-import fieldsAnalysis from './fieldAnalysis';
+import * as Utils from './utils/index'
import specification from './specification';
import * as Distribution from './distribution';
-import * as ImpurityMeasure from './impurityMeasure';
-
-import getInsightViews, { analysisDimensions, getCombination, clusterMeasures, kruskalMST, getDimSetsBasedOnClusterGroups } from './insights/index';
+import * as Insight from './insights/index';
import * as Cleaner from './cleaner/index';
import * as UnivariateSummary from './univariateSummary/index'
@@ -15,21 +11,18 @@ import * as UnivariateSummary from './univariateSummary/index'
import * as DashBoard from './dashboard/index';
import * as Sampling from './sampling/index';
+import * as Statistics from './statistics/index';
+import { Cluster } from './ml/index';
export {
DashBoard,
Sampling,
Utils,
+ Statistics,
UnivariateSummary,
- fieldsAnalysis,
Distribution,
- ImpurityMeasure,
specification,
- analysisDimensions,
Cleaner,
- getInsightViews,
- getCombination,
- getDimSetsBasedOnClusterGroups,
- clusterMeasures,
- kruskalMST
+ Insight,
+ Cluster
}
\ No newline at end of file
diff --git a/packages/visual-insights/src/insights/impurity.ts b/packages/visual-insights/src/insights/impurity.ts
index 6c20ff87..8d657dbb 100644
--- a/packages/visual-insights/src/insights/impurity.ts
+++ b/packages/visual-insights/src/insights/impurity.ts
@@ -1,30 +1,13 @@
// import { aggregate } from '../utils';
import aggregate from 'cube-core';
-import { entropy, normalize } from '../impurityMeasure';
+import { entropy, normalize } from '../statistics/index';
import { DataSource, OperatorType } from '../commonTypes';
-import { crammersV } from '../dashboard/utils';
+import { crammersV, getCombination, pearsonCC, linearMapPositive } from '../statistics/index';
import { CrammersVThreshold } from './config';
-import cluster from './cluster';
+import { Cluster } from '../ml/index';
+import { CHANNEL } from '../constant';
// insights like outlier and trend both request high impurity of dimension.
-const maxVisualChannel = 8;
-function getCombination(elements: string[], start: number = 1, end: number = elements.length): string[][] {
- let ans: string[][] = [];
- const combine = (step: number, set: string[], size: number) => {
- if (set.length === size) {
- ans.push([...set]);
- return;
- }
- if (step >= elements.length) {
- return;
- }
- combine(step + 1, [...set, elements[step]], size);
- combine(step + 1, set, size);
- }
- for (let i = start; i <= Math.min(end, maxVisualChannel); i++) {
- combine(0, [], i);
- }
- return ans
-}
+
function getDimCorrelationMatrix(dataSource: DataSource, dimensions: string[]): number[][] {
let matrix: number[][] = dimensions.map(d => dimensions.map(d => 0));
for (let i = 0; i < dimensions.length; i++) {
@@ -40,9 +23,8 @@ export function getDimSetsBasedOnClusterGroups(dataSource: DataSource, dimension
const maxDimNumberInView = 4;
let dimSets: string[][] = [];
let dimCorrelationMatrix = getDimCorrelationMatrix(dataSource, dimensions);
- console.log(dimCorrelationMatrix)
// groupMaxSize here means group number.
- let groups: string[][] = cluster({
+ let groups: string[][] = Cluster.kruskal({
matrix: dimCorrelationMatrix,
measures: dimensions,
groupMaxSize: Math.round(dimensions.length / maxDimNumberInView),
@@ -50,38 +32,24 @@ export function getDimSetsBasedOnClusterGroups(dataSource: DataSource, dimension
});
// todo: maybe a threhold would be better ?
for (let group of groups) {
- let combineDimSet: string[][] = getCombination(group);
+ let combineDimSet: string[][] = getCombination(group, 1, CHANNEL.maxDimensionNumber);
dimSets.push(...combineDimSet);
}
return dimSets;
}
-export function linearMapPositive (arr: number[]): number[] {
- let min = Math.min(...arr);
- return arr.map(a => a - min + 1);
-}
-
-function sum(arr: number[]): number {
- let sum = 0;
- for (let i = 0, len = arr.length; i < len; i++) {
- // if (typeof dataSource[i][field])
- sum += arr[i];
+export function subspaceSearching(dataSource: DataSource, dimensions: string[], shouldDimensionsCorrelated: boolean | undefined = true): string[][] {
+ if (shouldDimensionsCorrelated) {
+ return getDimSetsBasedOnClusterGroups(dataSource, dimensions);
+ } else {
+ return getCombination(dimensions)
}
- return sum;
}
-export function correlation(dataSource: DataSource, fieldX: string, fieldY: string): number {
- let r = 0;
- let xBar = sum(dataSource.map(r => r[fieldX])) / dataSource.length;
- let yBar = sum(dataSource.map(r => r[fieldY])) / dataSource.length;
- r = sum(dataSource.map(r => (r[fieldX] - xBar) * (r[fieldY] - yBar))) /
- Math.sqrt(sum(dataSource.map(r => Math.pow(r[fieldX] - xBar, 2))) * sum(dataSource.map(r => Math.pow(r[fieldY] - yBar, 2))));
- return r;
-}
export type FieldsFeature = [string[], any, number[][]];
-function analysisDimensions(dataSource: DataSource, dimensions: string[], measures: string[], operator: OperatorType | undefined = 'sum'): FieldsFeature[] {
+export function insightExtraction(dataSource: DataSource, dimensions: string[], measures: string[], operator: OperatorType | undefined = 'sum'): FieldsFeature[] {
let impurityList: FieldsFeature[] = [];
- let dimSet = getDimSetsBasedOnClusterGroups(dataSource, dimensions);
+ let dimSet = subspaceSearching(dataSource, dimensions, true);
for (let dset of dimSet) {
let impurity = {};
let aggData = aggregate({
@@ -103,7 +71,7 @@ function analysisDimensions(dataSource: DataSource, dimensions: string[], measur
for (let i = 0; i < measures.length; i++) {
correlationMatrix[i][i] = 1;
for (let j = i + 1; j < measures.length; j++) {
- let r = correlation(aggData, measures[i], measures[j]);
+ let r = pearsonCC(aggData, measures[i], measures[j]);
correlationMatrix[j][i] = correlationMatrix[i][j] = r;
}
}
@@ -111,5 +79,3 @@ function analysisDimensions(dataSource: DataSource, dimensions: string[], measur
}
return impurityList
}
-
-export { analysisDimensions, getCombination }
\ No newline at end of file
diff --git a/packages/visual-insights/src/insights/index.ts b/packages/visual-insights/src/insights/index.ts
index 4f6417d0..be0801d7 100644
--- a/packages/visual-insights/src/insights/index.ts
+++ b/packages/visual-insights/src/insights/index.ts
@@ -1,61 +1 @@
-import { analysisDimensions, getCombination, getDimSetsBasedOnClusterGroups } from './impurity';
-import { TopKPercentField } from './config';
-import { entropy, normalize } from '../impurityMeasure';
-import { memberCount } from '../utils'
-import cluster, { kruskalMST } from './cluster';
-import { DataSource } from '../commonTypes';
-
-function getInsightViews(dataSource: DataSource, originDimensions: string[], measures: string[]) {
- // 1. impurity of measures based on some dimensons (single variable or depth)
- // 2. correlation matrix of measures
- // cluster of measure group
- // rank dimension
- // choose one dimension
- let dimScores: [string, number, number][] = [];
- for (let dim of originDimensions) {
- const members = memberCount(dataSource, dim);
- const frequencyList = members.map(m => m[1]);
- const probabilityList = normalize(frequencyList);
- const fieldEntropy = entropy(probabilityList);
- const maxEntropy = Math.log2(members.length);
- dimScores.push([dim, fieldEntropy, maxEntropy]);
- }
- dimScores.sort((a, b) => a[1] - b[1]);
- const dimAnalysisSize = Math.round(TopKPercentField * dimScores.length);
- const dimensions = dimScores.slice(0, dimAnalysisSize).map(d => d[0]);
- let analysisReports = analysisDimensions(dataSource, dimensions, measures).map(dimReport => {
- let sum = 0;
- for (let key in dimReport[1]) {
- sum += dimReport[1][key];
- }
- return {
- detail: dimReport,
- score: sum
- }
- });
- analysisReports.sort((a, b) => {
- return a.score - b.score;
- });
-
- // let finalReports = analysisReports.slice(0, Math.round(analysisReports.length * 0.2)).map(report => {
- let finalReports = analysisReports.slice(0, Math.min(analysisReports.length, Math.round(Math.log10(analysisReports.length)) + 9)).map(report => {
- // let finalReports = analysisReports.map(report => {
- let matrix = report.detail[2];
- let groups = cluster({ matrix, measures });
- return {
- ...report,
- groups
- };
- });
- return finalReports
-
-}
-
-export default getInsightViews;
-export {
- analysisDimensions,
- getCombination,
- getDimSetsBasedOnClusterGroups,
- cluster as clusterMeasures,
- kruskalMST
-};
\ No newline at end of file
+export { insightExtraction, subspaceSearching } from './impurity';
diff --git a/packages/visual-insights/src/ml/cluster/index.ts b/packages/visual-insights/src/ml/cluster/index.ts
new file mode 100644
index 00000000..9876d45a
--- /dev/null
+++ b/packages/visual-insights/src/ml/cluster/index.ts
@@ -0,0 +1,6 @@
+import cluster, { kruskalMST } from './kruskal';
+
+export {
+ cluster as kruskal,
+ kruskalMST as kruskalWithFullMST
+}
\ No newline at end of file
diff --git a/packages/visual-insights/src/insights/cluster.ts b/packages/visual-insights/src/ml/cluster/kruskal.ts
similarity index 98%
rename from packages/visual-insights/src/insights/cluster.ts
rename to packages/visual-insights/src/ml/cluster/kruskal.ts
index 705c1ad3..1ad24b3c 100644
--- a/packages/visual-insights/src/insights/cluster.ts
+++ b/packages/visual-insights/src/ml/cluster/kruskal.ts
@@ -81,7 +81,7 @@ function kruskal(matrix: number[][], groupNumber: number, threshold: number | un
}
return groups;
}
-
+// todo: delete kruskal with limit size, use largest size to limit it(uncontrolled group number but better), need discuss.
/**
*
* @param matrix
diff --git a/packages/visual-insights/src/ml/index.ts b/packages/visual-insights/src/ml/index.ts
new file mode 100644
index 00000000..6e6a6234
--- /dev/null
+++ b/packages/visual-insights/src/ml/index.ts
@@ -0,0 +1,5 @@
+import * as Cluster from './cluster/index';
+
+export {
+ Cluster
+}
\ No newline at end of file
diff --git a/packages/visual-insights/src/specification.ts b/packages/visual-insights/src/specification.ts
index 60592735..5f05e116 100644
--- a/packages/visual-insights/src/specification.ts
+++ b/packages/visual-insights/src/specification.ts
@@ -3,7 +3,7 @@ import {
// isFieldCategory,
// isFieldContinous,
memberCount
-} from './utils';
+} from './utils/index';
interface VisualElements {
position: number;
color: number;
@@ -93,7 +93,7 @@ function aestheticMapping (dimFields: Field[]) {
return spec
}
-// todo:
+// todo (P1):
// don't use dimScores: FieldImpurity.
// it's a structure with redundency design.
function specification (dimScores: FieldImpurity[], aggData: DataSource, dimensions: string[], measures: string[]): View {
diff --git a/packages/visual-insights/src/dashboard/utils.ts b/packages/visual-insights/src/statistics/correlation.ts
similarity index 82%
rename from packages/visual-insights/src/dashboard/utils.ts
rename to packages/visual-insights/src/statistics/correlation.ts
index c4aab723..b1841513 100644
--- a/packages/visual-insights/src/dashboard/utils.ts
+++ b/packages/visual-insights/src/statistics/correlation.ts
@@ -54,6 +54,29 @@ export function crammersV(dataSource: DataSource, fieldX: string, fieldY: string
const V = Math.sqrt(chis / (dataSource.length * Math.min(xSet.size - 1, ySet.size - 1)))
return V;
}
+/**
+ * Pearson correlation coefficient
+ * @param dataSource
+ * @param fieldX
+ * @param fieldY
+ */
+export function pearsonCC(dataSource: DataSource, fieldX: string, fieldY: string): number {
+ let r = 0;
+ let xBar = sum(dataSource.map(r => r[fieldX])) / dataSource.length;
+ let yBar = sum(dataSource.map(r => r[fieldY])) / dataSource.length;
+ r = sum(dataSource.map(r => (r[fieldX] - xBar) * (r[fieldY] - yBar))) /
+ Math.sqrt(sum(dataSource.map(r => Math.pow(r[fieldX] - xBar, 2))) * sum(dataSource.map(r => Math.pow(r[fieldY] - yBar, 2))));
+ return r;
+}
+
+function sum(arr: number[]): number {
+ let sum = 0;
+ for (let i = 0, len = arr.length; i < len; i++) {
+ // if (typeof dataSource[i][field])
+ sum += arr[i];
+ }
+ return sum;
+}
// can be used for test
diff --git a/packages/visual-insights/src/statistics/index.ts b/packages/visual-insights/src/statistics/index.ts
new file mode 100644
index 00000000..c4b01922
--- /dev/null
+++ b/packages/visual-insights/src/statistics/index.ts
@@ -0,0 +1,2 @@
+export * from './correlation';
+export * from './utils';
\ No newline at end of file
diff --git a/packages/visual-insights/src/statistics/utils.ts b/packages/visual-insights/src/statistics/utils.ts
new file mode 100644
index 00000000..a3f4d07c
--- /dev/null
+++ b/packages/visual-insights/src/statistics/utils.ts
@@ -0,0 +1,49 @@
+export function linearMapPositive (arr: number[]): number[] {
+ let min = Math.min(...arr);
+ return arr.map(a => a - min + 1);
+}
+
+export function getCombination(elements: string[], start: number = 1, end: number = elements.length): string[][] {
+ let ans: string[][] = [];
+ const combine = (step: number, set: string[], size: number) => {
+ if (set.length === size) {
+ ans.push([...set]);
+ return;
+ }
+ if (step >= elements.length) {
+ return;
+ }
+ combine(step + 1, [...set, elements[step]], size);
+ combine(step + 1, set, size);
+ }
+ for (let i = start; i <= end; i++) {
+ combine(0, [], i);
+ }
+ return ans
+}
+
+export type ImpurityFC = (probabilityList: number[]) => number;
+
+export function normalize(frequencyList: number[]): number[] {
+ let sum = 0;
+ for (let f of frequencyList) {
+ sum += f;
+ }
+ return frequencyList.map(f => f / sum);
+}
+
+export const entropy: ImpurityFC = (probabilityList) => {
+ let sum = 0;
+ for (let p of probabilityList) {
+ sum += p * Math.log2(p);
+ }
+ return -sum;
+}
+
+export const gini: ImpurityFC = (probabilityList) => {
+ let sum = 0;
+ for (let p of probabilityList) {
+ sum += p * (1 - p);
+ }
+ return sum;
+}
diff --git a/packages/visual-insights/src/univariateSummary/index.ts b/packages/visual-insights/src/univariateSummary/index.ts
index 004f5683..50004964 100644
--- a/packages/visual-insights/src/univariateSummary/index.ts
+++ b/packages/visual-insights/src/univariateSummary/index.ts
@@ -1,6 +1,6 @@
-import { DataSource, Field, FieldImpurity, FieldType, Record } from '../commonTypes';
-import { isFieldTime, isFieldContinous, memberCount, isFieldCategory, deepcopy, groupContinousField, groupCategoryField } from '../utils';
-import { normalize, entropy } from '../impurityMeasure';
+import { DataSource, Field, FieldType, Record } from '../commonTypes';
+import { isFieldTime, isFieldContinous, memberCount, isFieldCategory, deepcopy, groupContinousField, groupCategoryField } from '../utils/index';
+import { normalize, entropy } from '../statistics/index';
import { isUniformDistribution } from '../distribution';
const MIN_QUAN_MEMBER_SIZE = 25;
diff --git a/packages/visual-insights/src/utils.ts b/packages/visual-insights/src/utils/common.ts
similarity index 99%
rename from packages/visual-insights/src/utils.ts
rename to packages/visual-insights/src/utils/common.ts
index d5b0f926..4ec91646 100644
--- a/packages/visual-insights/src/utils.ts
+++ b/packages/visual-insights/src/utils/common.ts
@@ -1,4 +1,4 @@
-import { DataSource } from './commonTypes'
+import { DataSource } from '../commonTypes'
const JOIN_SYMBOL = '_';
const MAGIC_NUMBER = 5;
diff --git a/packages/visual-insights/src/utils/index.ts b/packages/visual-insights/src/utils/index.ts
new file mode 100644
index 00000000..a18f8474
--- /dev/null
+++ b/packages/visual-insights/src/utils/index.ts
@@ -0,0 +1 @@
+export * from './common';
\ No newline at end of file
diff --git a/packages/visual-insights/src/visualization/geoms/interval.ts b/packages/visual-insights/src/visualization/geoms/interval.ts
deleted file mode 100644
index 215e49b2..00000000
--- a/packages/visual-insights/src/visualization/geoms/interval.ts
+++ /dev/null
@@ -1,3 +0,0 @@
-export class Interval {
-
-}
\ No newline at end of file
diff --git a/packages/visual-insights/src/visualization/index.ts b/packages/visual-insights/src/visualization/index.ts
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/visual-insights/test/dashboard.js b/packages/visual-insights/test/dashboard.js
index 4fb45b5c..589cf1c7 100644
--- a/packages/visual-insights/test/dashboard.js
+++ b/packages/visual-insights/test/dashboard.js
@@ -2,7 +2,7 @@ const fs = require('fs');
const assert = require('assert');
const path = require('path');
-const { analysisDimensions, Cleaner, getInsightViews, getCombination, DashBoard } = require('../build/cjs/index');
+const { Insight, Cleaner, Statistics, DashBoard } = require('../build/cjs/index');
const datasetPath = path.resolve(__dirname, './dataset/titanic.json');
const dataset = JSON.parse(fs.readFileSync(datasetPath).toString());
@@ -15,9 +15,9 @@ const {
} = dataset;
let cleanData = Cleaner.dropNull(dataSource, dimensions, measures);
-describe('insights test', function () {
+describe('dashboard test', function () {
it('print(dashboard)', function () {
- const fieldFeastureList = analysisDimensions(cleanData, dimensions, measures);
+ const fieldFeastureList = Insight.insightExtraction(cleanData, dimensions, measures);
// assert.equal(fieldFeastureList.length, dimensions.length);
const dashboardSpace = DashBoard.getDashBoardSubspace(cleanData, dimensions, measures, fieldFeastureList);
console.log(JSON.stringify(dashboardSpace, null, 2))
@@ -26,15 +26,9 @@ describe('insights test', function () {
assert.equal(sampleViewList.length > 0, true);
console.log(sampleViewList)
})
-
- // it('print(getInsightViews)', function () {
- // let result = getInsightViews(cleanData, dimensions, measures);
- // // console.log(result)
- // assert.equal(result.length > 0, true);
- // })
it('print(getCombination)', function () {
- let result = getCombination([1, 2, 3, 4, 5, 6]);
+ let result = Statistics.getCombination([1, 2, 3, 4, 5, 6]);
console.log(result)
assert.equal(result.length, Math.pow(2, 6) - 1)
})
diff --git a/packages/visual-insights/test/fieldAnalysis.js b/packages/visual-insights/test/fieldAnalysis.js
deleted file mode 100644
index 228ff283..00000000
--- a/packages/visual-insights/test/fieldAnalysis.js
+++ /dev/null
@@ -1,24 +0,0 @@
-const fs = require('fs');
-const assert = require('assert');
-const path = require('path');
-
-const { fieldsAnalysis } = require('../build/cjs/index');
-
-const datasetPath = path.resolve(__dirname, './dataset/titanic.json');
-const dataset = JSON.parse(fs.readFileSync(datasetPath).toString());
-const {
- dataSource,
- config: {
- Measures: measures
- }
-} = dataset;
-const dimensions = ['Age', 'Parch', 'Sex', 'Embarked', 'Pclass'];
-
-describe('test with titanic dataset', function () {
- it('[print result]', function () {
- const { dimScores: result } = fieldsAnalysis(dataSource, dimensions, measures);
- console.table(result)
- assert.equal(result.length, dimensions.length + measures.length);
- })
-})
-
diff --git a/packages/visual-insights/test/impurityMeasure.js b/packages/visual-insights/test/impurityMeasure.js
index 68c55565..dbe6df66 100644
--- a/packages/visual-insights/test/impurityMeasure.js
+++ b/packages/visual-insights/test/impurityMeasure.js
@@ -1,51 +1,55 @@
const assert = require('assert');
-const { ImpurityMeasure } = require('../build/cjs/index');
-const { normalize, gini, entropy } = ImpurityMeasure;
+const { Statistics } = require('../build/cjs/index');
+const { normalize, gini, entropy } = Statistics;
+
+function floatEqual (n1, n2) {
+ return Math.abs(n1 - n2) < Number.EPSILON * (2 ** 2);
+}
+
+function getRandomArray (size = 2 + Math.round(Math.random() * 1000)) {
+ let frequencyList = [];
+ for (let i = 0; i < size; i++) {
+ frequencyList.push(Math.round(Math.random() * 1000));
+ }
+ return frequencyList;
+}
+
describe('Impurity Measure test', function () {
describe('function: normalize', function () {
+ let frequencyList = [1,2,3,4,5];//getRandomArray();
+ const probabilityList = normalize(frequencyList);
+ it('values checks', function () {
+ let freSum = 0;
+ frequencyList.forEach(f => freSum += f);
+ probabilityList.forEach((p, i) => {
+ assert.equal(floatEqual(p, frequencyList[i] / freSum), true)
+ })
+ })
it('sum_{p} = 1', function () {
- const size = 2 + Math.round(Math.random() * 100);
- let frequencyList = [];
- for (let i = 0; i < size; i++) {
- frequencyList.push(Math.round(Math.random() * 1000));
- }
- const probabilityList = normalize(frequencyList);
-
let sum = 0;
- for (let p of probabilityList) {
- sum += p;
- }
- assert.equal(Math.abs(1 - sum) < Number.EPSILON * Math.pow(2, 2) * probabilityList.length, true);
- // assert.equal(Math.abs(1 - sum) < 0.001, true);
+ probabilityList.forEach(p => sum += p);
+ assert.equal(floatEqual(sum, 1), true);
})
})
describe('function: entropy', function () {
+ let size = 100 + Math.round(Math.random() * 100);
+ let frequencyList = getRandomArray(size);
+ const probabilityList = normalize(frequencyList);
+ let ans = entropy(probabilityList);
+ it('isNumber', function () {
+ assert.notEqual(ans, NaN);
+ })
it('value <=log(k)', function () {
- const size = 2 + Math.round(Math.random() * 100);
- let frequencyList = [];
- for (let i = 0; i < size; i++) {
- frequencyList.push(Math.round(Math.random() * 1000));
- }
- const probabilityList = normalize(frequencyList);
-
- let ans = entropy(probabilityList);
- assert.notEqual(ans, NaN);
- assert.equal(Math.log2(size) >= ans, true);
- })
+ assert.equal(Math.log2(size) + Number.EPSILON * (2 ** 3) >= ans - Number.EPSILON * (2 ** 3), true);
+ })
})
describe('function: gini', function () {
+ let frequencyList = getRandomArray();
+ let probabilityList = normalize(frequencyList);
+ let ans = gini(probabilityList);
it('value <= 1', function () {
- const size = 2 + Math.round(Math.random() * 100);
- let frequencyList = [];
- for (let i = 0; i < size; i++) {
- frequencyList.push(Math.round(Math.random() * 1000));
- }
- const probabilityList = normalize(frequencyList);
-
- let ans = gini(probabilityList);
-
assert.equal(ans <= 1, true);
})
})
diff --git a/packages/visual-insights/test/index.js b/packages/visual-insights/test/index.js
index de804e65..97499430 100644
--- a/packages/visual-insights/test/index.js
+++ b/packages/visual-insights/test/index.js
@@ -2,7 +2,7 @@ const fs = require('fs');
const assert = require('assert');
const path = require('path');
-const { specification, Cleaner, getInsightViews, fieldsAnalysis } = require('../build/cjs/index');
+const { specification, Cleaner } = require('../build/cjs/index');
const datasetPath = path.resolve(__dirname, './dataset/titanic.json');
const dataset = JSON.parse(fs.readFileSync(datasetPath).toString());
@@ -17,19 +17,19 @@ let cleanData = Cleaner.dropNull(dataSource, dimensions, measures);
describe('insights test', function () {
- it('print(getInsightViews)', function () {
- const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures);
- let result = getInsightViews(cleanData, dimensions, measures);
- // console.log(result)
- for (let report of result) {
- const dimList = report.detail[0];
- for (let meaList of report.groups) {
- const { schema, aggData } = specification(dimScores, cleanData, dimList, meaList);
- console.log(schema);
- assert.equal(Object.keys(schema).length > 0, true);
- }
- }
- assert.equal(result.length > 0, true);
- })
+ // it('print(getInsightViews)', function () {
+ // const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures);
+ // let result = getInsightViews(cleanData, dimensions, measures);
+ // // console.log(result)
+ // for (let report of result) {
+ // const dimList = report.detail[0];
+ // for (let meaList of report.groups) {
+ // const { schema, aggData } = specification(dimScores, cleanData, dimList, meaList);
+ // console.log(schema);
+ // assert.equal(Object.keys(schema).length > 0, true);
+ // }
+ // }
+ // assert.equal(result.length > 0, true);
+ // })
})
diff --git a/packages/visual-insights/test/insights.js b/packages/visual-insights/test/insights.js
index 50ff7127..c327a36e 100644
--- a/packages/visual-insights/test/insights.js
+++ b/packages/visual-insights/test/insights.js
@@ -2,7 +2,7 @@ const fs = require('fs');
const assert = require('assert');
const path = require('path');
-const { analysisDimensions, Cleaner, getInsightViews, getCombination, getDimSetsBasedOnClusterGroups } = require('../build/cjs/index');
+const { Insight, Cleaner, Statistics, Sampling } = require('../build/cjs/index');
const datasetPath = path.resolve(__dirname, './dataset/airbnb.json');
const dataset = JSON.parse(fs.readFileSync(datasetPath).toString());
@@ -13,32 +13,23 @@ const {
Measures: measures
}
} = dataset;
-let cleanData = Cleaner.dropNull(dataSource, dimensions, measures);
+let cleanData = Sampling.reservoirSampling(Cleaner.dropNull(dataSource, dimensions, measures), 2000);
describe('insights test', function () {
- // it('print(analysisDimensions)', function () {
- // const result = analysisDimensions(cleanData, dimensions, measures);
- // console.table(result.map(r => {
- // return [r[0][0], JSON.stringify(r[1]), JSON.stringify(r[2])];
- // }))
- // assert.equal(result.length, dimensions.length);
- // })
-
- // it('print(getInsightViews)', function () {
- // let result = getInsightViews(cleanData, dimensions, measures);
- // // console.log(result)
- // assert.equal(result.length > 0, true);
- // })
+ it('print(analysisDimensions)', function () {
+ const result = Insight.insightExtraction(cleanData, dimensions, measures);
+ assert.equal(result.length > 0, true);
+ })
it('print(getCombination)', function () {
- let result = getCombination([1, 2, 3, 4, 5, 6]);
+ let result = Statistics.getCombination([1, 2, 3, 4, 5, 6]);
console.log(result)
assert.equal(result.length, Math.pow(2, 6) - 1)
})
it('print(clusterCombination vs. combination)', function () {
- let result = getDimSetsBasedOnClusterGroups(cleanData, dimensions);
- let unClusterResult = getCombination(dimensions);
+ let result = Insight.subspaceSearching(cleanData, dimensions, true);
+ let unClusterResult = Statistics.getCombination(dimensions);
console.log(result.length, unClusterResult.length, result)
assert.equal(result.length <= unClusterResult.length, true);
})
diff --git a/packages/visual-insights/test/specification.js b/packages/visual-insights/test/specification.js
index ba6b6eb9..62c4fe3c 100644
--- a/packages/visual-insights/test/specification.js
+++ b/packages/visual-insights/test/specification.js
@@ -1,6 +1,6 @@
const assert = require('assert');
const fs = require('fs');
-const { specification, fieldsAnalysis } = require('../build/cjs/index');
+const { specification, UnivariateSummary } = require('../build/cjs/index');
const path = require('path');
const datasetPath = path.resolve(__dirname, './dataset/titanic.json');
@@ -23,7 +23,13 @@ const dimensions = ['Age', 'Survived', 'Parch', 'Sex', 'Embarked', 'Pclass'];
describe('specification test', function () {
it('specification result', function () {
- const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures);
+ const fieldEntropyList = UnivariateSummary.getAllFieldsEntropy(dataSource, dimensions.concat(measures));
+ const dimScores = fieldEntropyList.map(f => {
+ return [f.fieldName, f.entropy, f.maxEntropy, {
+ name: f.fieldName,
+ type: UnivariateSummary.getFieldType(dataSource, f.fieldName)
+ }]
+ })
const { schema, aggData } = specification(dimScores, dataSource, dimensions, measures);
console.log(schema);
assert.equal(Object.keys(schema).length > 0, true);