diff --git a/.gitignore b/.gitignore index f34d715d..e6f89c58 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,6 @@ yarn-error.log* .vscode safety -dist \ No newline at end of file +dist + +coverage \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index f0d6b261..90950275 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,5 @@ language: node_js node_js: - - "12" \ No newline at end of file + - "12" +after_success: +- npm run coveralls \ No newline at end of file diff --git a/packages/frontend/src/App.tsx b/packages/frontend/src/App.tsx index 3ca5954d..b0191baf 100644 --- a/packages/frontend/src/App.tsx +++ b/packages/frontend/src/App.tsx @@ -1,4 +1,4 @@ -import React, { useState } from "react"; +import React from "react"; import { useGlobalState, GlobalStateProvider } from "./state"; import { Pivot, PivotItem } from "office-ui-fabric-react"; import { useComposeState } from "./utils/index"; @@ -52,7 +52,6 @@ function App() { pivotKey: pivotList[0].itemKey } }); - return (
diff --git a/packages/frontend/src/actions.ts b/packages/frontend/src/actions.ts index 52f45643..2fc818d9 100644 --- a/packages/frontend/src/actions.ts +++ b/packages/frontend/src/actions.ts @@ -6,17 +6,19 @@ import { combineFieldsService, generateDashBoard, ViewSpace, + Subspace, clusterMeasures } from "./service"; import { GlobalState, StateUpdater } from './state'; -type Action = (state: GlobalState, updateState: (updater:StateUpdater) => void, params: T) => any; +type Action = (select: () => GlobalState, updateState: (updater:StateUpdater) => void, params: T) => any; -const univariateSummary: Action<{dataSource: DataSource; fields: BIField[]}> = async (state, updateState, params) => { +const univariateSummary: Action<{dataSource: DataSource; fields: BIField[]}> = async (select, updateState, params) => { const { dataSource, fields } = params; + const state = select(); const dimensions = fields .filter(field => field.type === "dimension") .map(field => field.name); @@ -117,8 +119,9 @@ interface SubspaceSeachParams { measures: string[]; operator: OperatorType } -const subspaceSearch: Action = async (state, updateState, params) => { +const subspaceSearch: Action = async (select, updateState, params) => { const { groupedData: dataSource, summary, dimensions, measures, operator } = params; + const state = select(); updateState(draft => { draft.loading.subspaceSearching = true; }); @@ -165,8 +168,13 @@ const subspaceSearch: Action = async (state, updateState, p } } -const getViewSpaces: Action = async (state, updateState, params) => { - const { cookedDataSource: dataSorce, subspaceList, maxGroupNumber, useServer } = state +interface GetViewSpacesProps { + subspaceList: Subspace[]; + maxGroupNumber: number; + useServer: boolean; +} +const getViewSpaces: Action = async (select, updateState, params) => { + const { subspaceList, maxGroupNumber, useServer } = params; let viewSpaces: ViewSpace[] = []; try { viewSpaces = await clusterMeasures( @@ -217,7 +225,8 @@ const extractInsights: Action<{dataSource: DataSource; fields: BIField[]}> = asy } } -const getDashBoard: Action<{dataSource: DataSource, dimensions: string[], measures: string[]}> = async (state, updateState, params) => { +const getDashBoard: Action<{dataSource: DataSource, dimensions: string[], measures: string[]}> = async (select, updateState, params) => { + const state = select(); const { dataSource, dimensions, measures } = params; updateState(draft => { draft.loading.dashBoard = true diff --git a/packages/frontend/src/components/table.tsx b/packages/frontend/src/components/table.tsx index cb4b3cd9..939394b2 100644 --- a/packages/frontend/src/components/table.tsx +++ b/packages/frontend/src/components/table.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { DetailsList, SelectionMode } from 'office-ui-fabric-react'; -import { DataSource, BIField, BIFieldType } from '../global'; +import { DataSource, BIField } from '../global'; export interface DataTableProps { dataSource: DataSource, fields: BIField[] diff --git a/packages/frontend/src/pages/dashBoard/combinedChart.tsx b/packages/frontend/src/pages/dashBoard/combinedChart.tsx index 02014e6c..f880545a 100644 --- a/packages/frontend/src/pages/dashBoard/combinedChart.tsx +++ b/packages/frontend/src/pages/dashBoard/combinedChart.tsx @@ -1,6 +1,6 @@ import React, { useMemo, useEffect, useState } from "react"; import { DashBoard } from "../../service"; -import { DataSource, Field, FieldType } from "../../global"; +import { DataSource, Field } from "../../global"; import { specification } from "visual-insights"; import { useComposeState } from "../../utils/index"; import { IconButton } from "office-ui-fabric-react"; @@ -27,28 +27,7 @@ const CombinedChart: React.FC = props => { useEffect(() => { setChartStateList(dashBoard.map(() => false)); }, [dashBoard]); - const filedDomains = useMemo(() => { - const fieldList = dimScores.map(f => f[0]); - let domainDict: {[key: string]: any[]} = {}; - let dsLen = dataSource.length; - let fLen = fieldList.length; - for (let i = 0; i < fLen; i++) { - if (dimScores[i][3].type !== 'quantitative') { - continue; - // tmp - } - let fieldName = fieldList[i]; - domainDict[fieldName] = [0, 100]; - let min = Infinity; - let max = -Infinity; - for (let j = 0; j < dsLen; j++) { - min = Math.min(dataSource[j][fieldName], min) - max = Math.max(dataSource[j][fieldName], max) - } - domainDict[fieldName] = [min, max]; - } - return domainDict; - }, [dataSource, dimScores]) + const chartSpecList = useMemo(() => { if (!dashBoard || !dataSource || !dimScores) { return []; @@ -81,7 +60,9 @@ const CombinedChart: React.FC = props => { }); }, [dashBoard, dataSource, dimScores]); - const fieldFeatures = dimScores.map(dim => dim[3]); + const fieldFeatures = useMemo(() => { + return dimScores.map(dim => dim[3]) + }, [dimScores]); const specList = useMemo(() => { return chartSpecList.map((spec, index) => { @@ -125,7 +106,7 @@ const CombinedChart: React.FC = props => { vegaSpec.specIndex = index return vegaSpec }) as any; - }, [chartSpecList, filedDomains]); + }, [chartSpecList, fieldFeatures]); const signalHandler = useMemo(() => { return dashBoard.map((d, index) => { @@ -148,7 +129,7 @@ const CombinedChart: React.FC = props => { } }; }); - }, [dashBoard, chartStateList, dimScores, specList]); + }, [dashBoard, chartStateList, setGlobalFilters]); const vsourceList = useMemo>(() => { let ans = []; @@ -191,8 +172,7 @@ const CombinedChart: React.FC = props => { dashBoard, globalFilters, dataSource, - chartStateList, - dataSource + chartStateList ]); return (
diff --git a/packages/frontend/src/pages/dashBoard/index.tsx b/packages/frontend/src/pages/dashBoard/index.tsx index 08e5eee5..53707dba 100644 --- a/packages/frontend/src/pages/dashBoard/index.tsx +++ b/packages/frontend/src/pages/dashBoard/index.tsx @@ -4,7 +4,7 @@ import { useGlobalState } from "../../state"; import CombinedChart from "./combinedChart"; const DashBoardPage: React.FC = props => { - const [state, updateState, dispatch, getters] = useGlobalState(); + const [state, , dispatch, getters] = useGlobalState(); const [dashBoardIndex, setDashBoardIndex] = useState(0); const { subspaceList, diff --git a/packages/frontend/src/pages/dashBoard/indicatorCard.tsx b/packages/frontend/src/pages/dashBoard/indicatorCard.tsx index 16ac8e9b..4e890901 100644 --- a/packages/frontend/src/pages/dashBoard/indicatorCard.tsx +++ b/packages/frontend/src/pages/dashBoard/indicatorCard.tsx @@ -54,7 +54,7 @@ const IndicatorCard: React.FC = props => { ans = Number(ans.toFixed(2)); } return numberWithCommas(ans); - }, [dataSource, measures]); + }, [dataSource, measures, operator]); return ( diff --git a/packages/frontend/src/pages/dataSource/index.tsx b/packages/frontend/src/pages/dataSource/index.tsx index 047be9e8..1b048542 100644 --- a/packages/frontend/src/pages/dataSource/index.tsx +++ b/packages/frontend/src/pages/dataSource/index.tsx @@ -1,4 +1,4 @@ -import React, { useState, useRef, useCallback } from "react"; +import React, { useState, useRef } from "react"; import { useGlobalState } from "../../state"; import { FileLoader, useComposeState } from '../../utils/index'; import { ComboBox, PrimaryButton, IconButton, Callout, Stack, CommandBar, ChoiceGroup, IChoiceGroupOption, Slider, Label, Checkbox } from 'office-ui-fabric-react'; diff --git a/packages/frontend/src/pages/dataSource/useDataSource.tsx b/packages/frontend/src/pages/dataSource/useDataSource.tsx index 961a5357..ac14f670 100644 --- a/packages/frontend/src/pages/dataSource/useDataSource.tsx +++ b/packages/frontend/src/pages/dataSource/useDataSource.tsx @@ -1,4 +1,4 @@ -import React, { useMemo } from 'react'; +import { useMemo } from 'react'; import { BIField, DataSource, Record } from '../../global'; import { Transform } from '../../utils/index' import { cleanData, CleanMethod } from './clean'; diff --git a/packages/frontend/src/pages/gallery/association/digDimension.tsx b/packages/frontend/src/pages/gallery/association/digDimension.tsx index a235923c..bf2a2143 100644 --- a/packages/frontend/src/pages/gallery/association/digDimension.tsx +++ b/packages/frontend/src/pages/gallery/association/digDimension.tsx @@ -1,7 +1,5 @@ -import React, { useMemo } from 'react'; -import { - ViewSpace -} from '../../../service'; +import { useMemo } from 'react'; +import { ViewSpace } from '../../../service'; import { Field, DataSource } from '../../../global'; import { specification } from 'visual-insights'; import { PreferencePanelConfig } from '../../../components/preference'; @@ -41,7 +39,7 @@ export interface DigDimensionProps { } function useDigDimension(props: DigDimensionProps) { - const { viewSpaces, interestedViewSpace, fieldScores, dataSource, visualConfig } = props; + const { viewSpaces, interestedViewSpace, fieldScores, dataSource } = props; const relatedSpaces = useMemo(() => { let ans: ViewSpace[] = [] for (let space of viewSpaces) { @@ -69,7 +67,7 @@ function useDigDimension(props: DigDimensionProps) { } } return ans.sort((a, b) => a.relatedScore - b.relatedScore); - }, [relatedSpaces]); + }, [relatedSpaces, interestedViewSpace.measures]); const viewList = useMemo(() => { const ans = rankedRelatedSpaces.slice(0, topKRelatedSpace).map(space => { @@ -82,7 +80,7 @@ function useDigDimension(props: DigDimensionProps) { }; }) return ans; - }, [rankedRelatedSpaces, fieldScores]) + }, [rankedRelatedSpaces, fieldScores, dataSource]) return viewList } diff --git a/packages/frontend/src/pages/gallery/index.tsx b/packages/frontend/src/pages/gallery/index.tsx index 7a569b55..096b8752 100644 --- a/packages/frontend/src/pages/gallery/index.tsx +++ b/packages/frontend/src/pages/gallery/index.tsx @@ -122,7 +122,7 @@ const Gallery: React.FC = props => { draft.loading.gallery = false; }); }); - }, [subspaceList, dataSource, state.maxGroupNumber]); + }, [subspaceList, dataSource, state.maxGroupNumber, state.useServer, updateState]); const dimScores = useMemo<[string, number, number, Field][]>(() => { return [...summary.origin, ...summary.grouped].map(field => { @@ -185,12 +185,12 @@ const Gallery: React.FC = props => { console.log(error); } } - }, [viewSpaces, currentPage]); + }, [viewSpaces, currentPage, dataSource, dimScores]); const currentSpace = useMemo(() => { return subspaceList.find(subspace => { return subspace.dimensions.join(",") === dataView.dimensions.join(","); })!; - }, [subspaceList, currentPage, dataView]); + }, [subspaceList, dataView]); useEffect(() => { setShowAssociation(false); }, [currentPage]); diff --git a/packages/frontend/src/pages/gallery/search/index.tsx b/packages/frontend/src/pages/gallery/search/index.tsx index 03cc4793..6a8ab091 100644 --- a/packages/frontend/src/pages/gallery/search/index.tsx +++ b/packages/frontend/src/pages/gallery/search/index.tsx @@ -31,7 +31,7 @@ function usePageController (size: number) { const maxPageNumber = Math.ceil(size / PAGE_SIZE); const gotoPage = (num: number) => { let fixPageNum = (num + maxPageNumber) % maxPageNumber; - setCurrentPage(num) + setCurrentPage(fixPageNum) } const lastPage = () => { @@ -57,13 +57,19 @@ function usePageController (size: number) { } const SearchPage: React.FC = props => { - const [state, updateState, dispatch, getters] = useGlobalState(); + const [state, , dispatch, getters] = useGlobalState(); const [targetViewSpaces, setTargetViewSpaces] = useState([]); - const { subspaceList, viewSpaces } = state; + const { subspaceList, viewSpaces, maxGroupNumber, useServer } = state; const { dimScores } = getters; + useEffect(() => { - dispatch('getViewSpaces', {}) - }, [subspaceList]) + dispatch('getViewSpaces', { + subspaceList, + maxGroupNumber, + useServer + }) + }, [subspaceList, maxGroupNumber, useServer, dispatch]) + const fuse = useMemo(() => { const options: FuseOptions = { keys: [ @@ -100,7 +106,7 @@ const SearchPage: React.FC = props => { measures } }) - }, [state.cookedDataSource, targetViewSpaces, itemRange]) + }, [state.cookedDataSource, targetViewSpaces, itemRange, dimScores]) return (
diff --git a/packages/frontend/src/pages/notebook/cluster.tsx b/packages/frontend/src/pages/notebook/cluster.tsx index 5f78cf3e..8afaed76 100644 --- a/packages/frontend/src/pages/notebook/cluster.tsx +++ b/packages/frontend/src/pages/notebook/cluster.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useMemo, useRef } from 'react'; -import { kruskalMST } from 'visual-insights'; +import { Cluster } from 'visual-insights'; import { useGlobalState } from '../../state'; import embed from 'vega-embed'; // cluster should be used for small graph because the number of measure is limited. @@ -31,7 +31,7 @@ interface TreeData { } const ClusterBoard: React.FC = (props) => { const { adjMatrix, measures, onFocusGroup } = props; - const [state, updateState] = useGlobalState(); + const [state, ] = useGlobalState(); const chart = useRef(null); // const groups = useMemo(() => { // return clusterMeasures({ @@ -44,7 +44,7 @@ const ClusterBoard: React.FC = (props) => { * todo: * maxGroupNumber = the measures length / max visual channel for measure. */ - let { edgesInMST, groups } = kruskalMST(adjMatrix, state.maxGroupNumber); + let { edgesInMST, groups } = Cluster.kruskalWithFullMST(adjMatrix, state.maxGroupNumber); return { edgesInMST, groups } }, [adjMatrix, state.maxGroupNumber]) const treeData = useMemo(() => { @@ -231,7 +231,7 @@ const ClusterBoard: React.FC = (props) => { }) }) } - }, [treeData, measures]) + }, [treeData, measures, clusterResult.groups, onFocusGroup]) return
} diff --git a/packages/frontend/src/pages/notebook/distributionChart.tsx b/packages/frontend/src/pages/notebook/distributionChart.tsx index 7be3a693..dc7f5a4b 100644 --- a/packages/frontend/src/pages/notebook/distributionChart.tsx +++ b/packages/frontend/src/pages/notebook/distributionChart.tsx @@ -56,7 +56,7 @@ const DistributionChart: React.FC = (props) => { actions: false }) } - }, [x, y, dataSource]) + }, [x, y, dataSource, fieldType]) return
} diff --git a/packages/frontend/src/pages/notebook/fieldAnalysis.tsx b/packages/frontend/src/pages/notebook/fieldAnalysis.tsx index 3e788544..96d51d9b 100644 --- a/packages/frontend/src/pages/notebook/fieldAnalysis.tsx +++ b/packages/frontend/src/pages/notebook/fieldAnalysis.tsx @@ -1,6 +1,6 @@ import React, { useMemo } from 'react'; import { DetailsList, SelectionMode, IColumn, Icon, HoverCard, IExpandingCardProps } from 'office-ui-fabric-react'; -import chroma, { Color } from 'chroma-js'; +import chroma from 'chroma-js'; import { FieldSummary } from '../../service'; import DistributionChart from './distributionChart'; import { FieldType, Record } from '../../global'; @@ -130,7 +130,6 @@ const FieldAnalsis: React.FC = (props) => { case 'entropy': case 'maxEntropy': bgColor = getValueColor(item[column.key], entropyRange); - let fontColor = contrastColor(bgColor) let bgColorStr = `rgb(${bgColor.join(',')})` let fontColorStr = `rgb(${contrastColor(bgColor).join(',')})` return ( diff --git a/packages/frontend/src/pages/notebook/index.tsx b/packages/frontend/src/pages/notebook/index.tsx index 86bf2741..487ab99c 100644 --- a/packages/frontend/src/pages/notebook/index.tsx +++ b/packages/frontend/src/pages/notebook/index.tsx @@ -1,4 +1,4 @@ -import React, { useMemo, useState, useEffect } from 'react'; +import React, { useMemo, useState, useEffect, useCallback } from 'react'; import { DataSource, Field } from '../../global'; import FieldAnalysisBoard from './fieldAnalysis'; import Subspaces from './subspaces'; @@ -19,15 +19,16 @@ interface NoteBookProps { }; subspaceList: Subspace[] } +interface ClusterState { + measures: string[]; + dimensions: string[]; + matrix: number[][]; +} const NoteBook: React.FC = (props) => { const { summary, subspaceList, dataSource } = props; const [state, updateState] = useGlobalState(); const [isAggregated, setIsAggregated] = useState(true); - interface ClusterState { - measures: string[]; - dimensions: string[]; - matrix: number[][]; - } + const [clusterState, setClusterState] = useState({ measures: [], dimensions: [], @@ -46,7 +47,7 @@ const NoteBook: React.FC = (props) => { }, [summary.origin, summary.grouped]) const spec = useMemo(() => { - const { dimensions, measures } = clusterState; + const { dimensions } = clusterState; // todo // this condition is not strict enough. dimScores should share same elements with dimensions and measures. // maybe use try catch in future @@ -77,85 +78,155 @@ const NoteBook: React.FC = (props) => { updateState(draft => { draft.maxGroupNumber = Math.round(state.cookedMeasures.length / maxMeasureInView) }) - }, [state.cookedMeasures]) - return
-

Univariate Summary

-

Hover your mouse over the fields and see the distails and entropy reducing strategy.

- {state.loading.univariateSummary && } -
- -
+ }, [state.cookedMeasures, updateState]) + + const onSpaceChange = useCallback((dimensions, measures, matrix) => { + setClusterState({ + dimensions, + measures, + matrix + }); + }, [setClusterState]) + + const onFocusGroup = useCallback(measInView => { + setMeasuresInView(measInView); + }, []) + return ( +
+

Univariate Summary

+

+ Hover your mouse over the fields and see the distails and entropy + reducing strategy. +

+ {state.loading.univariateSummary && ( + + )} +
+ +
+ +

Subspace Searching

+

+ Try to choose one row(combination of dimensions) of the subspace and see + the changes of the processes below. +

+ {state.loading.subspaceSearching && ( + + )} + {!state.loading.univariateSummary && ( + `${value}%`} + showValue={true} + onChange={(value: number) => { + updateState(draft => { + draft.topK.dimensionSize = value / 100; + draft.loading.subspaceSearching = true; + }); + const selectedDimensions = state.cookedDimensions.slice( + 0, + Math.round((state.cookedDimensions.length * value) / 100) + ); + combineFieldsService( + dataSource, + selectedDimensions, + state.cookedMeasures, + "sum", + state.useServer + ).then(subspaces => { + if (subspaces) { + updateState(draft => { + draft.subspaceList = subspaces; + draft.loading.subspaceSearching = false; + }); + } + }); + }} + /> + )} + {!state.loading.univariateSummary && ( + `${value}%`} + showValue={true} + onChange={(value: number) => { + updateState(draft => { + draft.topK.subspacePercentSize = value / 100; + }); + }} + /> + )} +
+ +
-

Subspace Searching

-

Try to choose one row(combination of dimensions) of the subspace and see the changes of the processes below.

- {state.loading.subspaceSearching && } - {!state.loading.univariateSummary && `${value}%`} showValue={true} - onChange={(value: number) => { - updateState(draft => { - draft.topK.dimensionSize = value / 100; - draft.loading.subspaceSearching = true; - }) - const selectedDimensions = state.cookedDimensions.slice(0, Math.round(state.cookedDimensions.length * value / 100)); - combineFieldsService(dataSource, selectedDimensions, state.cookedMeasures, 'sum', state.useServer) - .then(subspaces => { - if (subspaces) { - updateState(draft => { - draft.subspaceList = subspaces; - draft.loading.subspaceSearching = false - }) - } - }) - }}/>} - { - !state.loading.univariateSummary && `${value}%`} showValue={true} +

Measurement Clustering

+

+ Try to choose one group to visualize them. +

+ { updateState(draft => { - draft.topK.subspacePercentSize = value / 100; - }) - }}/> - } -
- { - setClusterState({ - dimensions, - measures, - matrix - }) - }} /> -
+ draft.maxGroupNumber = value; + }); + }} + /> +
+ +
-

Measurement Clustering

-

Try to choose one group to visualize them.

- { updateState(draft => { - draft.maxGroupNumber = value - })}} - /> -
- { setMeasuresInView(measInView); console.log('view in measures', measInView) }} /> +

Visualization

+

+ If there is no result here, try to click one group of measures in{" "} + Clustering process above. +

+ { + setIsAggregated(!!checked); + }} + /> +
+ dim[3])} + dataSource={dataSource} + dimensions={clusterState.dimensions} + measures={measuresInView} + /> +
- -

Visualization

-

- If there is no result here, try to click one group of measures in Clustering process above. -

- {setIsAggregated(!!checked)}} /> -
- dim[3])} - dataSource={dataSource} - dimensions={clusterState.dimensions} measures={measuresInView} /> -
- -
+ ); } export default NoteBook; \ No newline at end of file diff --git a/packages/frontend/src/pages/notebook/subspaces.tsx b/packages/frontend/src/pages/notebook/subspaces.tsx index 916637b6..b0a7a191 100644 --- a/packages/frontend/src/pages/notebook/subspaces.tsx +++ b/packages/frontend/src/pages/notebook/subspaces.tsx @@ -30,18 +30,8 @@ const Subspaces: React.FC = (props) => { onSpaceChange(targetSpace.dimensions, targetSpace.measures.map(m => m.name), targetSpace.correlationMatrix); setCurIndex(index); } - }, [subspaceList]) - const range = useMemo<[number, number]>(() => { - let max = 0; - let min = Infinity; - for (let space of subspaceList) { - for (let { name, value } of space.measures) { - max = Math.max(max, value); - min = Math.min(min, value); - } - } - return [min, max] - }, [subspaceList]) + }, [subspaceList, onSpaceChange]) + const values = useMemo(() => { let ans = []; // todos: @@ -63,7 +53,7 @@ const Subspaces: React.FC = (props) => { } } return ans - }, [subspaceList, range]) + }, [subspaceList]) useEffect(() => { if (spaceChart.current && subspaceList.length > 0) { embed(spaceChart.current, { @@ -113,6 +103,7 @@ const Subspaces: React.FC = (props) => { }) }) } + // eslint-disable-next-line }, [subspaceList, values]) useEffect(() => { let space = subspaceList[curIndex]; diff --git a/packages/frontend/src/plugins/visSummary/description.tsx b/packages/frontend/src/plugins/visSummary/description.tsx index 2064e897..235c7404 100644 --- a/packages/frontend/src/plugins/visSummary/description.tsx +++ b/packages/frontend/src/plugins/visSummary/description.tsx @@ -33,7 +33,7 @@ const VisDescription: React.FC = (props) => { }) } } - }, []) + }, [space, spaceList]) const bestMeasure = useMemo(() => { if (typeof space === 'undefined') return; const measuresInView = space.measures.filter(mea => measures.includes(mea.name)); diff --git a/packages/frontend/src/plugins/visSummary/index.tsx b/packages/frontend/src/plugins/visSummary/index.tsx index f6bdcb40..74ee81dc 100644 --- a/packages/frontend/src/plugins/visSummary/index.tsx +++ b/packages/frontend/src/plugins/visSummary/index.tsx @@ -35,7 +35,7 @@ const StoryTeller: React.FC = (props) => { }) } } - }, []) + }, [space, spaceList]) const bestMeasure = useMemo(() => { if (typeof space === 'undefined') return; const measuresInView = space.measures.filter(mea => measures.includes(mea.name)); diff --git a/packages/frontend/src/state.tsx b/packages/frontend/src/state.tsx index 8444ce62..9a1d220c 100644 --- a/packages/frontend/src/state.tsx +++ b/packages/frontend/src/state.tsx @@ -1,8 +1,8 @@ import produce, { Draft, setAutoFreeze } from 'immer'; -import React, { useState, useMemo, createContext, useContext } from 'react'; +import React, { useState, useMemo, createContext, useContext, useCallback, useRef } from 'react'; import { DataSource, BIField, Field } from './global'; import { Subspace, FieldSummary, ViewSpace, DashBoard } from './service'; -import actions, { Test, Actions } from './actions'; +import actions, { Test } from './actions'; setAutoFreeze(false) @@ -142,25 +142,28 @@ function useGetters(state: GlobalState) { return getters; } export function GlobalStateProvider({ children }: { children: React.ReactNode }) { - const [state, setState] = useState(initState) + const [state, setState] = useState(initState); + const stateHolder = useRef(initState); const getters = useGetters(state); - const updateState = (stateUpdater: StateUpdater) => { + const updateState = useCallback((stateUpdater: StateUpdater) => { setState(state => { const nextState = produce(state, draftState => stateUpdater(draftState)) + stateHolder.current = nextState; return nextState; }) - } - - const dispatch:

(actionName: P['name'], params: P['params']) => void = (actionName, params) => { + }, [setState]) + const dispatch:

(actionName: P['name'], params: P['params']) => void = useCallback((actionName, params) => { if (typeof actions[actionName] === 'function') { - + function select (): GlobalState { + return stateHolder.current + } // todo: fix the any type - actions[actionName](state, updateState, params as any); + actions[actionName](select, updateState, params as any); // actions['subspaceSearch'](state, updateState, params) } - } + }, [updateState]) diff --git a/packages/frontend/src/utils/useComposeState.tsx b/packages/frontend/src/utils/useComposeState.tsx index 9617447b..794ee52b 100644 --- a/packages/frontend/src/utils/useComposeState.tsx +++ b/packages/frontend/src/utils/useComposeState.tsx @@ -1,4 +1,4 @@ -import React, { useState } from 'react'; +import { useState, useCallback } from 'react'; import produce, { Draft } from 'immer'; /** * @param S type of the composed state @@ -19,9 +19,11 @@ export type StateUpdater = (draftState: Draft) => void */ export default function useComposeState(initState: S): [S, (stateUpdater: StateUpdater) => void] { const [state, setState] = useState(initState) - const updateState = (stateUpdater: StateUpdater) => { - const nextState = produce(state, draftState => stateUpdater(draftState)) - setState(nextState) - } + const updateState = useCallback((stateUpdater: StateUpdater) => { + setState(state => { + const nextState = produce(state, draftState => stateUpdater(draftState)) + return nextState + }) + }, [setState]) return [state, updateState] } \ No newline at end of file diff --git a/packages/frontend/src/visBuilder/vegaBase.tsx b/packages/frontend/src/visBuilder/vegaBase.tsx index 217b6282..0a26a55f 100644 --- a/packages/frontend/src/visBuilder/vegaBase.tsx +++ b/packages/frontend/src/visBuilder/vegaBase.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useRef, useMemo } from 'react'; import aggregate from 'cube-core'; import embed from 'vega-embed'; -import { DataSource, Field, FieldType } from '../global' +import { DataSource, Field } from '../global' import { baseVis } from '../queries/index'; export const geomTypeMap: {[key: string]: any} = { interval: 'bar', @@ -53,7 +53,7 @@ const BaseChart: React.FC = (props) => { as: `${mea}_${aggregator}` } }) - }, [measures]) + }, [measures, aggregator]) let table = useMemo(() => { if (!defaultAggregated) { diff --git a/packages/frontend/src/workers/cluster.worker.js b/packages/frontend/src/workers/cluster.worker.js index e5c24776..d7b90a52 100644 --- a/packages/frontend/src/workers/cluster.worker.js +++ b/packages/frontend/src/workers/cluster.worker.js @@ -1,5 +1,7 @@ /* eslint no-restricted-globals: 0 */ -import { kruskalMST } from 'visual-insights'; +import { Cluster } from 'visual-insights'; +import { timer } from './timer'; + const PearsonThreshold = 0.5; function sum (arr) { let ans = 0; @@ -10,12 +12,11 @@ function sum (arr) { return ans; } const cluster = (e) => { - console.log('[cluster measures]') try { const { spaces, maxGroupNumber } = e.data; let result = []; for (let space of spaces) { - const { edgesInMST, groups } = kruskalMST(space.matrix, maxGroupNumber, PearsonThreshold); + const { edgesInMST, groups } = Cluster.kruskalWithFullMST(space.matrix, maxGroupNumber, PearsonThreshold); let measureGroups = new Map(); for (let i = 0; i < groups.length; i++) { if (!measureGroups.has(groups[i])) { @@ -44,4 +45,4 @@ const cluster = (e) => { } } -self.addEventListener('message', cluster, false); +self.addEventListener('message', timer(cluster), false); diff --git a/packages/frontend/src/workers/combineFields.worker.js b/packages/frontend/src/workers/combineFields.worker.js index a0b35192..7e92202b 100644 --- a/packages/frontend/src/workers/combineFields.worker.js +++ b/packages/frontend/src/workers/combineFields.worker.js @@ -1,11 +1,11 @@ /* eslint no-restricted-globals: 0 */ -import { analysisDimensions } from 'visual-insights' +import { Insight } from 'visual-insights' +import { timer } from './timer'; const combineFields = (e) => { - console.log('[combine fields]') try { const { dataSource, dimensions, measures, operator, topKPercent = 1 } = e.data; - let impurityList = analysisDimensions(dataSource, dimensions, measures, operator).map(dimReport => { + let impurityList = Insight.insightExtraction(dataSource, dimensions, measures, operator).map(dimReport => { let sum = 0; for (let key in dimReport[1]) { sum += dimReport[1][key]; @@ -41,4 +41,4 @@ const combineFields = (e) => { } } -self.addEventListener('message', combineFields, false); \ No newline at end of file +self.addEventListener('message', timer(combineFields), false); \ No newline at end of file diff --git a/packages/frontend/src/workers/dashboard.worker.js b/packages/frontend/src/workers/dashboard.worker.js index e82d9f0a..b69bd926 100644 --- a/packages/frontend/src/workers/dashboard.worker.js +++ b/packages/frontend/src/workers/dashboard.worker.js @@ -1,5 +1,6 @@ /* eslint no-restricted-globals: 0 */ import { DashBoard } from 'visual-insights'; +import { timer } from './timer'; function transSubspaces2FieldsFeature(subspaces) { let fieldFeatureList = []; @@ -43,4 +44,4 @@ const generateDashBoard = (e) => { } } -self.addEventListener('message', generateDashBoard, false); \ No newline at end of file +self.addEventListener('message', timer(generateDashBoard), false); \ No newline at end of file diff --git a/packages/frontend/src/workers/fieldsSummary.worker.js b/packages/frontend/src/workers/fieldsSummary.worker.js index 93f7ce13..4e99a9cb 100644 --- a/packages/frontend/src/workers/fieldsSummary.worker.js +++ b/packages/frontend/src/workers/fieldsSummary.worker.js @@ -1,7 +1,8 @@ /* eslint no-restricted-globals: 0 */ import { UnivariateSummary } from 'visual-insights'; -const { getAllFieldsDistribution, getAllFieldTypes, getAllFieldsEntropy } = UnivariateSummary; +import { timer } from './timer'; +const { getAllFieldsDistribution, getAllFieldTypes, getAllFieldsEntropy } = UnivariateSummary; const fieldSummary = (e) => { const { fields, dataSource } = e.data; @@ -43,4 +44,4 @@ const fieldSummary = (e) => { } } -self.addEventListener('message', fieldSummary, false) \ No newline at end of file +self.addEventListener('message', timer(fieldSummary), false) \ No newline at end of file diff --git a/packages/frontend/src/workers/groupFields.worker.js b/packages/frontend/src/workers/groupFields.worker.js index 39cee289..2f97b338 100644 --- a/packages/frontend/src/workers/groupFields.worker.js +++ b/packages/frontend/src/workers/groupFields.worker.js @@ -1,8 +1,7 @@ /* eslint no-restricted-globals: 0 */ import { UnivariateSummary } from 'visual-insights'; - +import { timer } from './timer'; const groupFields = (e) => { - console.log('group fields worker'); try { const { dataSource, fields } = e.data; const result = UnivariateSummary.groupFields(dataSource, fields); @@ -18,4 +17,4 @@ const groupFields = (e) => { } } -self.addEventListener('message', groupFields, false); \ No newline at end of file +self.addEventListener('message', timer(groupFields), false); \ No newline at end of file diff --git a/packages/frontend/src/workers/timer.js b/packages/frontend/src/workers/timer.js new file mode 100644 index 00000000..b3651d63 --- /dev/null +++ b/packages/frontend/src/workers/timer.js @@ -0,0 +1,15 @@ +/** + * + * @param {task} task is a function + */ +export function timer (task) { + return function (e) { + let startTime = new Date().getTime(); + try { + task(e); + } finally { + let cost = new Date().getTime() - startTime; + console.log(`Task [${task.name}] cost ${cost} ms.`) + } + } +} \ No newline at end of file diff --git a/packages/visual-insights/README.md b/packages/visual-insights/README.md index 655ab8b1..a345634b 100644 --- a/packages/visual-insights/README.md +++ b/packages/visual-insights/README.md @@ -1,7 +1,8 @@ # Visual-Insights -![](https://travis-ci.org/ObservedObserver/visual-insights.svg?branch=master) +![](https://travis-ci.org/kanaries/Rath.svg?branch=master) ![](https://img.shields.io/npm/v/visual-insights?color=blue) +[![Coverage Status](https://coveralls.io/repos/github/Kanaries/Rath/badge.svg?branch=master)](https://coveralls.io/github/Kanaries/Rath?branch=dev) ### API diff --git a/packages/visual-insights/package.json b/packages/visual-insights/package.json index d953b6fc..4443b6ae 100644 --- a/packages/visual-insights/package.json +++ b/packages/visual-insights/package.json @@ -10,15 +10,14 @@ "module": "./build/esm/index.js", "types": "./build/esm/index.d.ts", "dependencies": { - "cube-core": "^2.13.0", - "mocha": "^6.2.0", - "typescript": "^3.6.3" + "cube-core": "^2.13.0" }, "scripts": { "build": "npm run buildfront && npm run buildback", "buildback": "tsc -p ./tsconfig.cjs.json", "buildfront": "tsc -p ./tsconfig.esm.json", - "test": "npm run buildback && mocha --no-timeouts" + "test": "npm run buildback && mocha --no-timeouts", + "coverage": "istanbul cover _mocha --report lcovonly -- --no-timeouts -R spec && cat ./coverage/lcov.info | coveralls && rm -rf ./coverage" }, "eslintConfig": { "extends": "react-app" @@ -34,5 +33,12 @@ "last 1 firefox version", "last 1 safari version" ] + }, + "devDependencies": { + "coveralls": "^3.0.9", + "istanbul": "^0.4.5", + "mocha-lcov-reporter": "^1.3.0", + "mocha": "^6.2.0", + "typescript": "^3.6.3" } } diff --git a/packages/visual-insights/src/cleaner/index.ts b/packages/visual-insights/src/cleaner/index.ts index e10081a0..5e3c2e25 100644 --- a/packages/visual-insights/src/cleaner/index.ts +++ b/packages/visual-insights/src/cleaner/index.ts @@ -1,4 +1,4 @@ -import { deepcopy, isFieldNumeric, isFieldTime, memberCount } from '../utils'; +import { deepcopy, isFieldNumeric, memberCount } from '../utils/index'; import { DataSource } from '../commonTypes'; function dropNullColumn (dataSource: DataSource, fields: string[]): { fields: string[]; dataSource: DataSource} { let keepFields = fields.map(() => false); diff --git a/packages/visual-insights/src/constant.ts b/packages/visual-insights/src/constant.ts new file mode 100644 index 00000000..ff71243e --- /dev/null +++ b/packages/visual-insights/src/constant.ts @@ -0,0 +1,4 @@ +export const CHANNEL = { + maxDimensionNumber: 8, + maxMeasureNumber: 6 +} \ No newline at end of file diff --git a/packages/visual-insights/src/dashboard/index.ts b/packages/visual-insights/src/dashboard/index.ts index 75cd286e..e3503d0e 100644 --- a/packages/visual-insights/src/dashboard/index.ts +++ b/packages/visual-insights/src/dashboard/index.ts @@ -1,9 +1,9 @@ -import { FieldsFeature, correlation, linearMapPositive } from "../insights/impurity"; +import { FieldsFeature } from "../insights/impurity"; import { DataSource, OperatorType } from "../commonTypes"; -import cluster from "../insights/cluster"; +import { Cluster } from "../ml/index"; import aggregate from 'cube-core'; -import { normalize, entropy } from "../impurityMeasure"; -import { crammersV } from './utils'; +import { normalize, entropy } from "../statistics/index"; +import { crammersV, pearsonCC, linearMapPositive } from '../statistics/index'; import { CrammersVThreshold, PearsonCorrelation } from '../insights/config'; interface DashBoardSpace { @@ -41,12 +41,12 @@ export function getDashBoardSubspace (dataSource: DataSource, dimensions: string for (let i = 0; i < measures.length; i++) { correlationMatrix[i][i] = 1; for (let j = i + 1; j < measures.length; j++) { - let r = correlation(dataSource, measures[i], measures[j]); + let r = pearsonCC(dataSource, measures[i], measures[j]); correlationMatrix[j][i] = correlationMatrix[i][j] = r; } } - const measureGroups = cluster({ + const measureGroups = Cluster.kruskal({ matrix: correlationMatrix, measures, groupMaxSize: Math.round(measures.length / 6), // todo: make a config: max 6 measures in a dashboard @@ -118,7 +118,7 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da /** * correlation view */ - const measureGroups = cluster({ + const measureGroups = Cluster.kruskal({ matrix: dashBoardSpace.correlationMatrix, measures: measures, groupMaxSize: Math.round(measures.length / 3), // todo: make a config: max 3 measures in a chart @@ -144,7 +144,7 @@ export function getDashBoardView (dashBoardSpace: DashBoardSpace, dataSource: Da * impact views * todo: protentional repeat view or very similiar view */ - const dimensionGroups = cluster({ + const dimensionGroups = Cluster.kruskal({ matrix: dimensionCorrelationMatrix, measures: dimensions, groupMaxSize: 2, // todo: make a config: max 2 dimensions in a chart diff --git a/packages/visual-insights/src/distribution.ts b/packages/visual-insights/src/distribution.ts index 8d6827b4..f7922201 100644 --- a/packages/visual-insights/src/distribution.ts +++ b/packages/visual-insights/src/distribution.ts @@ -1,5 +1,5 @@ import { DataSource } from './commonTypes'; -import { memberCount } from './utils'; +import { memberCount } from './utils/index'; function isUniformDistribution(dataSource: DataSource, field: string): boolean { const members = memberCount(dataSource, field); diff --git a/packages/visual-insights/src/fieldAnalysis.ts b/packages/visual-insights/src/fieldAnalysis.ts index 73675c62..fd57be69 100644 --- a/packages/visual-insights/src/fieldAnalysis.ts +++ b/packages/visual-insights/src/fieldAnalysis.ts @@ -1,3 +1,6 @@ +/** + * todo: delete this file, fieldsAnalysis is an old api. + */ import { DataSource, Field, FieldImpurity } from './commonTypes' import { @@ -10,13 +13,10 @@ import { // isFieldNumeric, isFieldTime, isFieldContinous -} from './utils'; +} from './utils/index'; import { isUniformDistribution } from './distribution'; -import { - normalize, - entropy, -} from './impurityMeasure'; +import { normalize, entropy } from './statistics/index'; const MAGIC_NUMBER = 5; diff --git a/packages/visual-insights/src/impurityMeasure.ts b/packages/visual-insights/src/impurityMeasure.ts deleted file mode 100644 index 1d51c23e..00000000 --- a/packages/visual-insights/src/impurityMeasure.ts +++ /dev/null @@ -1,31 +0,0 @@ -export type ImpurityFC = (probabilityList: number[]) => number; - -function normalize(frequencyList: number[]): number[] { - let sum = 0; - for (let f of frequencyList) { - sum += f; - } - return frequencyList.map(f => f / sum); -} - -const entropy: ImpurityFC = (probabilityList) => { - let sum = 0; - for (let p of probabilityList) { - sum += p * Math.log2(p); - } - return -sum; -} - -const gini: ImpurityFC = (probabilityList) => { - let sum = 0; - for (let p of probabilityList) { - sum += p * (1 - p); - } - return sum; -} - -export { - normalize, - entropy, - gini -} \ No newline at end of file diff --git a/packages/visual-insights/src/index.ts b/packages/visual-insights/src/index.ts index 7e6422c2..150f135d 100644 --- a/packages/visual-insights/src/index.ts +++ b/packages/visual-insights/src/index.ts @@ -1,13 +1,9 @@ -import * as Utils from './utils' - -import fieldsAnalysis from './fieldAnalysis'; +import * as Utils from './utils/index' import specification from './specification'; import * as Distribution from './distribution'; -import * as ImpurityMeasure from './impurityMeasure'; - -import getInsightViews, { analysisDimensions, getCombination, clusterMeasures, kruskalMST, getDimSetsBasedOnClusterGroups } from './insights/index'; +import * as Insight from './insights/index'; import * as Cleaner from './cleaner/index'; import * as UnivariateSummary from './univariateSummary/index' @@ -15,21 +11,18 @@ import * as UnivariateSummary from './univariateSummary/index' import * as DashBoard from './dashboard/index'; import * as Sampling from './sampling/index'; +import * as Statistics from './statistics/index'; +import { Cluster } from './ml/index'; export { DashBoard, Sampling, Utils, + Statistics, UnivariateSummary, - fieldsAnalysis, Distribution, - ImpurityMeasure, specification, - analysisDimensions, Cleaner, - getInsightViews, - getCombination, - getDimSetsBasedOnClusterGroups, - clusterMeasures, - kruskalMST + Insight, + Cluster } \ No newline at end of file diff --git a/packages/visual-insights/src/insights/impurity.ts b/packages/visual-insights/src/insights/impurity.ts index 6c20ff87..8d657dbb 100644 --- a/packages/visual-insights/src/insights/impurity.ts +++ b/packages/visual-insights/src/insights/impurity.ts @@ -1,30 +1,13 @@ // import { aggregate } from '../utils'; import aggregate from 'cube-core'; -import { entropy, normalize } from '../impurityMeasure'; +import { entropy, normalize } from '../statistics/index'; import { DataSource, OperatorType } from '../commonTypes'; -import { crammersV } from '../dashboard/utils'; +import { crammersV, getCombination, pearsonCC, linearMapPositive } from '../statistics/index'; import { CrammersVThreshold } from './config'; -import cluster from './cluster'; +import { Cluster } from '../ml/index'; +import { CHANNEL } from '../constant'; // insights like outlier and trend both request high impurity of dimension. -const maxVisualChannel = 8; -function getCombination(elements: string[], start: number = 1, end: number = elements.length): string[][] { - let ans: string[][] = []; - const combine = (step: number, set: string[], size: number) => { - if (set.length === size) { - ans.push([...set]); - return; - } - if (step >= elements.length) { - return; - } - combine(step + 1, [...set, elements[step]], size); - combine(step + 1, set, size); - } - for (let i = start; i <= Math.min(end, maxVisualChannel); i++) { - combine(0, [], i); - } - return ans -} + function getDimCorrelationMatrix(dataSource: DataSource, dimensions: string[]): number[][] { let matrix: number[][] = dimensions.map(d => dimensions.map(d => 0)); for (let i = 0; i < dimensions.length; i++) { @@ -40,9 +23,8 @@ export function getDimSetsBasedOnClusterGroups(dataSource: DataSource, dimension const maxDimNumberInView = 4; let dimSets: string[][] = []; let dimCorrelationMatrix = getDimCorrelationMatrix(dataSource, dimensions); - console.log(dimCorrelationMatrix) // groupMaxSize here means group number. - let groups: string[][] = cluster({ + let groups: string[][] = Cluster.kruskal({ matrix: dimCorrelationMatrix, measures: dimensions, groupMaxSize: Math.round(dimensions.length / maxDimNumberInView), @@ -50,38 +32,24 @@ export function getDimSetsBasedOnClusterGroups(dataSource: DataSource, dimension }); // todo: maybe a threhold would be better ? for (let group of groups) { - let combineDimSet: string[][] = getCombination(group); + let combineDimSet: string[][] = getCombination(group, 1, CHANNEL.maxDimensionNumber); dimSets.push(...combineDimSet); } return dimSets; } -export function linearMapPositive (arr: number[]): number[] { - let min = Math.min(...arr); - return arr.map(a => a - min + 1); -} - -function sum(arr: number[]): number { - let sum = 0; - for (let i = 0, len = arr.length; i < len; i++) { - // if (typeof dataSource[i][field]) - sum += arr[i]; +export function subspaceSearching(dataSource: DataSource, dimensions: string[], shouldDimensionsCorrelated: boolean | undefined = true): string[][] { + if (shouldDimensionsCorrelated) { + return getDimSetsBasedOnClusterGroups(dataSource, dimensions); + } else { + return getCombination(dimensions) } - return sum; } -export function correlation(dataSource: DataSource, fieldX: string, fieldY: string): number { - let r = 0; - let xBar = sum(dataSource.map(r => r[fieldX])) / dataSource.length; - let yBar = sum(dataSource.map(r => r[fieldY])) / dataSource.length; - r = sum(dataSource.map(r => (r[fieldX] - xBar) * (r[fieldY] - yBar))) / - Math.sqrt(sum(dataSource.map(r => Math.pow(r[fieldX] - xBar, 2))) * sum(dataSource.map(r => Math.pow(r[fieldY] - yBar, 2)))); - return r; -} export type FieldsFeature = [string[], any, number[][]]; -function analysisDimensions(dataSource: DataSource, dimensions: string[], measures: string[], operator: OperatorType | undefined = 'sum'): FieldsFeature[] { +export function insightExtraction(dataSource: DataSource, dimensions: string[], measures: string[], operator: OperatorType | undefined = 'sum'): FieldsFeature[] { let impurityList: FieldsFeature[] = []; - let dimSet = getDimSetsBasedOnClusterGroups(dataSource, dimensions); + let dimSet = subspaceSearching(dataSource, dimensions, true); for (let dset of dimSet) { let impurity = {}; let aggData = aggregate({ @@ -103,7 +71,7 @@ function analysisDimensions(dataSource: DataSource, dimensions: string[], measur for (let i = 0; i < measures.length; i++) { correlationMatrix[i][i] = 1; for (let j = i + 1; j < measures.length; j++) { - let r = correlation(aggData, measures[i], measures[j]); + let r = pearsonCC(aggData, measures[i], measures[j]); correlationMatrix[j][i] = correlationMatrix[i][j] = r; } } @@ -111,5 +79,3 @@ function analysisDimensions(dataSource: DataSource, dimensions: string[], measur } return impurityList } - -export { analysisDimensions, getCombination } \ No newline at end of file diff --git a/packages/visual-insights/src/insights/index.ts b/packages/visual-insights/src/insights/index.ts index 4f6417d0..be0801d7 100644 --- a/packages/visual-insights/src/insights/index.ts +++ b/packages/visual-insights/src/insights/index.ts @@ -1,61 +1 @@ -import { analysisDimensions, getCombination, getDimSetsBasedOnClusterGroups } from './impurity'; -import { TopKPercentField } from './config'; -import { entropy, normalize } from '../impurityMeasure'; -import { memberCount } from '../utils' -import cluster, { kruskalMST } from './cluster'; -import { DataSource } from '../commonTypes'; - -function getInsightViews(dataSource: DataSource, originDimensions: string[], measures: string[]) { - // 1. impurity of measures based on some dimensons (single variable or depth) - // 2. correlation matrix of measures - // cluster of measure group - // rank dimension - // choose one dimension - let dimScores: [string, number, number][] = []; - for (let dim of originDimensions) { - const members = memberCount(dataSource, dim); - const frequencyList = members.map(m => m[1]); - const probabilityList = normalize(frequencyList); - const fieldEntropy = entropy(probabilityList); - const maxEntropy = Math.log2(members.length); - dimScores.push([dim, fieldEntropy, maxEntropy]); - } - dimScores.sort((a, b) => a[1] - b[1]); - const dimAnalysisSize = Math.round(TopKPercentField * dimScores.length); - const dimensions = dimScores.slice(0, dimAnalysisSize).map(d => d[0]); - let analysisReports = analysisDimensions(dataSource, dimensions, measures).map(dimReport => { - let sum = 0; - for (let key in dimReport[1]) { - sum += dimReport[1][key]; - } - return { - detail: dimReport, - score: sum - } - }); - analysisReports.sort((a, b) => { - return a.score - b.score; - }); - - // let finalReports = analysisReports.slice(0, Math.round(analysisReports.length * 0.2)).map(report => { - let finalReports = analysisReports.slice(0, Math.min(analysisReports.length, Math.round(Math.log10(analysisReports.length)) + 9)).map(report => { - // let finalReports = analysisReports.map(report => { - let matrix = report.detail[2]; - let groups = cluster({ matrix, measures }); - return { - ...report, - groups - }; - }); - return finalReports - -} - -export default getInsightViews; -export { - analysisDimensions, - getCombination, - getDimSetsBasedOnClusterGroups, - cluster as clusterMeasures, - kruskalMST -}; \ No newline at end of file +export { insightExtraction, subspaceSearching } from './impurity'; diff --git a/packages/visual-insights/src/ml/cluster/index.ts b/packages/visual-insights/src/ml/cluster/index.ts new file mode 100644 index 00000000..9876d45a --- /dev/null +++ b/packages/visual-insights/src/ml/cluster/index.ts @@ -0,0 +1,6 @@ +import cluster, { kruskalMST } from './kruskal'; + +export { + cluster as kruskal, + kruskalMST as kruskalWithFullMST +} \ No newline at end of file diff --git a/packages/visual-insights/src/insights/cluster.ts b/packages/visual-insights/src/ml/cluster/kruskal.ts similarity index 98% rename from packages/visual-insights/src/insights/cluster.ts rename to packages/visual-insights/src/ml/cluster/kruskal.ts index 705c1ad3..1ad24b3c 100644 --- a/packages/visual-insights/src/insights/cluster.ts +++ b/packages/visual-insights/src/ml/cluster/kruskal.ts @@ -81,7 +81,7 @@ function kruskal(matrix: number[][], groupNumber: number, threshold: number | un } return groups; } - +// todo: delete kruskal with limit size, use largest size to limit it(uncontrolled group number but better), need discuss. /** * * @param matrix diff --git a/packages/visual-insights/src/ml/index.ts b/packages/visual-insights/src/ml/index.ts new file mode 100644 index 00000000..6e6a6234 --- /dev/null +++ b/packages/visual-insights/src/ml/index.ts @@ -0,0 +1,5 @@ +import * as Cluster from './cluster/index'; + +export { + Cluster +} \ No newline at end of file diff --git a/packages/visual-insights/src/specification.ts b/packages/visual-insights/src/specification.ts index 60592735..5f05e116 100644 --- a/packages/visual-insights/src/specification.ts +++ b/packages/visual-insights/src/specification.ts @@ -3,7 +3,7 @@ import { // isFieldCategory, // isFieldContinous, memberCount -} from './utils'; +} from './utils/index'; interface VisualElements { position: number; color: number; @@ -93,7 +93,7 @@ function aestheticMapping (dimFields: Field[]) { return spec } -// todo: +// todo (P1): // don't use dimScores: FieldImpurity. // it's a structure with redundency design. function specification (dimScores: FieldImpurity[], aggData: DataSource, dimensions: string[], measures: string[]): View { diff --git a/packages/visual-insights/src/dashboard/utils.ts b/packages/visual-insights/src/statistics/correlation.ts similarity index 82% rename from packages/visual-insights/src/dashboard/utils.ts rename to packages/visual-insights/src/statistics/correlation.ts index c4aab723..b1841513 100644 --- a/packages/visual-insights/src/dashboard/utils.ts +++ b/packages/visual-insights/src/statistics/correlation.ts @@ -54,6 +54,29 @@ export function crammersV(dataSource: DataSource, fieldX: string, fieldY: string const V = Math.sqrt(chis / (dataSource.length * Math.min(xSet.size - 1, ySet.size - 1))) return V; } +/** + * Pearson correlation coefficient + * @param dataSource + * @param fieldX + * @param fieldY + */ +export function pearsonCC(dataSource: DataSource, fieldX: string, fieldY: string): number { + let r = 0; + let xBar = sum(dataSource.map(r => r[fieldX])) / dataSource.length; + let yBar = sum(dataSource.map(r => r[fieldY])) / dataSource.length; + r = sum(dataSource.map(r => (r[fieldX] - xBar) * (r[fieldY] - yBar))) / + Math.sqrt(sum(dataSource.map(r => Math.pow(r[fieldX] - xBar, 2))) * sum(dataSource.map(r => Math.pow(r[fieldY] - yBar, 2)))); + return r; +} + +function sum(arr: number[]): number { + let sum = 0; + for (let i = 0, len = arr.length; i < len; i++) { + // if (typeof dataSource[i][field]) + sum += arr[i]; + } + return sum; +} // can be used for test diff --git a/packages/visual-insights/src/statistics/index.ts b/packages/visual-insights/src/statistics/index.ts new file mode 100644 index 00000000..c4b01922 --- /dev/null +++ b/packages/visual-insights/src/statistics/index.ts @@ -0,0 +1,2 @@ +export * from './correlation'; +export * from './utils'; \ No newline at end of file diff --git a/packages/visual-insights/src/statistics/utils.ts b/packages/visual-insights/src/statistics/utils.ts new file mode 100644 index 00000000..a3f4d07c --- /dev/null +++ b/packages/visual-insights/src/statistics/utils.ts @@ -0,0 +1,49 @@ +export function linearMapPositive (arr: number[]): number[] { + let min = Math.min(...arr); + return arr.map(a => a - min + 1); +} + +export function getCombination(elements: string[], start: number = 1, end: number = elements.length): string[][] { + let ans: string[][] = []; + const combine = (step: number, set: string[], size: number) => { + if (set.length === size) { + ans.push([...set]); + return; + } + if (step >= elements.length) { + return; + } + combine(step + 1, [...set, elements[step]], size); + combine(step + 1, set, size); + } + for (let i = start; i <= end; i++) { + combine(0, [], i); + } + return ans +} + +export type ImpurityFC = (probabilityList: number[]) => number; + +export function normalize(frequencyList: number[]): number[] { + let sum = 0; + for (let f of frequencyList) { + sum += f; + } + return frequencyList.map(f => f / sum); +} + +export const entropy: ImpurityFC = (probabilityList) => { + let sum = 0; + for (let p of probabilityList) { + sum += p * Math.log2(p); + } + return -sum; +} + +export const gini: ImpurityFC = (probabilityList) => { + let sum = 0; + for (let p of probabilityList) { + sum += p * (1 - p); + } + return sum; +} diff --git a/packages/visual-insights/src/univariateSummary/index.ts b/packages/visual-insights/src/univariateSummary/index.ts index 004f5683..50004964 100644 --- a/packages/visual-insights/src/univariateSummary/index.ts +++ b/packages/visual-insights/src/univariateSummary/index.ts @@ -1,6 +1,6 @@ -import { DataSource, Field, FieldImpurity, FieldType, Record } from '../commonTypes'; -import { isFieldTime, isFieldContinous, memberCount, isFieldCategory, deepcopy, groupContinousField, groupCategoryField } from '../utils'; -import { normalize, entropy } from '../impurityMeasure'; +import { DataSource, Field, FieldType, Record } from '../commonTypes'; +import { isFieldTime, isFieldContinous, memberCount, isFieldCategory, deepcopy, groupContinousField, groupCategoryField } from '../utils/index'; +import { normalize, entropy } from '../statistics/index'; import { isUniformDistribution } from '../distribution'; const MIN_QUAN_MEMBER_SIZE = 25; diff --git a/packages/visual-insights/src/utils.ts b/packages/visual-insights/src/utils/common.ts similarity index 99% rename from packages/visual-insights/src/utils.ts rename to packages/visual-insights/src/utils/common.ts index d5b0f926..4ec91646 100644 --- a/packages/visual-insights/src/utils.ts +++ b/packages/visual-insights/src/utils/common.ts @@ -1,4 +1,4 @@ -import { DataSource } from './commonTypes' +import { DataSource } from '../commonTypes' const JOIN_SYMBOL = '_'; const MAGIC_NUMBER = 5; diff --git a/packages/visual-insights/src/utils/index.ts b/packages/visual-insights/src/utils/index.ts new file mode 100644 index 00000000..a18f8474 --- /dev/null +++ b/packages/visual-insights/src/utils/index.ts @@ -0,0 +1 @@ +export * from './common'; \ No newline at end of file diff --git a/packages/visual-insights/src/visualization/geoms/interval.ts b/packages/visual-insights/src/visualization/geoms/interval.ts deleted file mode 100644 index 215e49b2..00000000 --- a/packages/visual-insights/src/visualization/geoms/interval.ts +++ /dev/null @@ -1,3 +0,0 @@ -export class Interval { - -} \ No newline at end of file diff --git a/packages/visual-insights/src/visualization/index.ts b/packages/visual-insights/src/visualization/index.ts deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/visual-insights/test/dashboard.js b/packages/visual-insights/test/dashboard.js index 4fb45b5c..589cf1c7 100644 --- a/packages/visual-insights/test/dashboard.js +++ b/packages/visual-insights/test/dashboard.js @@ -2,7 +2,7 @@ const fs = require('fs'); const assert = require('assert'); const path = require('path'); -const { analysisDimensions, Cleaner, getInsightViews, getCombination, DashBoard } = require('../build/cjs/index'); +const { Insight, Cleaner, Statistics, DashBoard } = require('../build/cjs/index'); const datasetPath = path.resolve(__dirname, './dataset/titanic.json'); const dataset = JSON.parse(fs.readFileSync(datasetPath).toString()); @@ -15,9 +15,9 @@ const { } = dataset; let cleanData = Cleaner.dropNull(dataSource, dimensions, measures); -describe('insights test', function () { +describe('dashboard test', function () { it('print(dashboard)', function () { - const fieldFeastureList = analysisDimensions(cleanData, dimensions, measures); + const fieldFeastureList = Insight.insightExtraction(cleanData, dimensions, measures); // assert.equal(fieldFeastureList.length, dimensions.length); const dashboardSpace = DashBoard.getDashBoardSubspace(cleanData, dimensions, measures, fieldFeastureList); console.log(JSON.stringify(dashboardSpace, null, 2)) @@ -26,15 +26,9 @@ describe('insights test', function () { assert.equal(sampleViewList.length > 0, true); console.log(sampleViewList) }) - - // it('print(getInsightViews)', function () { - // let result = getInsightViews(cleanData, dimensions, measures); - // // console.log(result) - // assert.equal(result.length > 0, true); - // }) it('print(getCombination)', function () { - let result = getCombination([1, 2, 3, 4, 5, 6]); + let result = Statistics.getCombination([1, 2, 3, 4, 5, 6]); console.log(result) assert.equal(result.length, Math.pow(2, 6) - 1) }) diff --git a/packages/visual-insights/test/fieldAnalysis.js b/packages/visual-insights/test/fieldAnalysis.js deleted file mode 100644 index 228ff283..00000000 --- a/packages/visual-insights/test/fieldAnalysis.js +++ /dev/null @@ -1,24 +0,0 @@ -const fs = require('fs'); -const assert = require('assert'); -const path = require('path'); - -const { fieldsAnalysis } = require('../build/cjs/index'); - -const datasetPath = path.resolve(__dirname, './dataset/titanic.json'); -const dataset = JSON.parse(fs.readFileSync(datasetPath).toString()); -const { - dataSource, - config: { - Measures: measures - } -} = dataset; -const dimensions = ['Age', 'Parch', 'Sex', 'Embarked', 'Pclass']; - -describe('test with titanic dataset', function () { - it('[print result]', function () { - const { dimScores: result } = fieldsAnalysis(dataSource, dimensions, measures); - console.table(result) - assert.equal(result.length, dimensions.length + measures.length); - }) -}) - diff --git a/packages/visual-insights/test/impurityMeasure.js b/packages/visual-insights/test/impurityMeasure.js index 68c55565..dbe6df66 100644 --- a/packages/visual-insights/test/impurityMeasure.js +++ b/packages/visual-insights/test/impurityMeasure.js @@ -1,51 +1,55 @@ const assert = require('assert'); -const { ImpurityMeasure } = require('../build/cjs/index'); -const { normalize, gini, entropy } = ImpurityMeasure; +const { Statistics } = require('../build/cjs/index'); +const { normalize, gini, entropy } = Statistics; + +function floatEqual (n1, n2) { + return Math.abs(n1 - n2) < Number.EPSILON * (2 ** 2); +} + +function getRandomArray (size = 2 + Math.round(Math.random() * 1000)) { + let frequencyList = []; + for (let i = 0; i < size; i++) { + frequencyList.push(Math.round(Math.random() * 1000)); + } + return frequencyList; +} + describe('Impurity Measure test', function () { describe('function: normalize', function () { + let frequencyList = [1,2,3,4,5];//getRandomArray(); + const probabilityList = normalize(frequencyList); + it('values checks', function () { + let freSum = 0; + frequencyList.forEach(f => freSum += f); + probabilityList.forEach((p, i) => { + assert.equal(floatEqual(p, frequencyList[i] / freSum), true) + }) + }) it('sum_{p} = 1', function () { - const size = 2 + Math.round(Math.random() * 100); - let frequencyList = []; - for (let i = 0; i < size; i++) { - frequencyList.push(Math.round(Math.random() * 1000)); - } - const probabilityList = normalize(frequencyList); - let sum = 0; - for (let p of probabilityList) { - sum += p; - } - assert.equal(Math.abs(1 - sum) < Number.EPSILON * Math.pow(2, 2) * probabilityList.length, true); - // assert.equal(Math.abs(1 - sum) < 0.001, true); + probabilityList.forEach(p => sum += p); + assert.equal(floatEqual(sum, 1), true); }) }) describe('function: entropy', function () { + let size = 100 + Math.round(Math.random() * 100); + let frequencyList = getRandomArray(size); + const probabilityList = normalize(frequencyList); + let ans = entropy(probabilityList); + it('isNumber', function () { + assert.notEqual(ans, NaN); + }) it('value <=log(k)', function () { - const size = 2 + Math.round(Math.random() * 100); - let frequencyList = []; - for (let i = 0; i < size; i++) { - frequencyList.push(Math.round(Math.random() * 1000)); - } - const probabilityList = normalize(frequencyList); - - let ans = entropy(probabilityList); - assert.notEqual(ans, NaN); - assert.equal(Math.log2(size) >= ans, true); - }) + assert.equal(Math.log2(size) + Number.EPSILON * (2 ** 3) >= ans - Number.EPSILON * (2 ** 3), true); + }) }) describe('function: gini', function () { + let frequencyList = getRandomArray(); + let probabilityList = normalize(frequencyList); + let ans = gini(probabilityList); it('value <= 1', function () { - const size = 2 + Math.round(Math.random() * 100); - let frequencyList = []; - for (let i = 0; i < size; i++) { - frequencyList.push(Math.round(Math.random() * 1000)); - } - const probabilityList = normalize(frequencyList); - - let ans = gini(probabilityList); - assert.equal(ans <= 1, true); }) }) diff --git a/packages/visual-insights/test/index.js b/packages/visual-insights/test/index.js index de804e65..97499430 100644 --- a/packages/visual-insights/test/index.js +++ b/packages/visual-insights/test/index.js @@ -2,7 +2,7 @@ const fs = require('fs'); const assert = require('assert'); const path = require('path'); -const { specification, Cleaner, getInsightViews, fieldsAnalysis } = require('../build/cjs/index'); +const { specification, Cleaner } = require('../build/cjs/index'); const datasetPath = path.resolve(__dirname, './dataset/titanic.json'); const dataset = JSON.parse(fs.readFileSync(datasetPath).toString()); @@ -17,19 +17,19 @@ let cleanData = Cleaner.dropNull(dataSource, dimensions, measures); describe('insights test', function () { - it('print(getInsightViews)', function () { - const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures); - let result = getInsightViews(cleanData, dimensions, measures); - // console.log(result) - for (let report of result) { - const dimList = report.detail[0]; - for (let meaList of report.groups) { - const { schema, aggData } = specification(dimScores, cleanData, dimList, meaList); - console.log(schema); - assert.equal(Object.keys(schema).length > 0, true); - } - } - assert.equal(result.length > 0, true); - }) + // it('print(getInsightViews)', function () { + // const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures); + // let result = getInsightViews(cleanData, dimensions, measures); + // // console.log(result) + // for (let report of result) { + // const dimList = report.detail[0]; + // for (let meaList of report.groups) { + // const { schema, aggData } = specification(dimScores, cleanData, dimList, meaList); + // console.log(schema); + // assert.equal(Object.keys(schema).length > 0, true); + // } + // } + // assert.equal(result.length > 0, true); + // }) }) diff --git a/packages/visual-insights/test/insights.js b/packages/visual-insights/test/insights.js index 50ff7127..c327a36e 100644 --- a/packages/visual-insights/test/insights.js +++ b/packages/visual-insights/test/insights.js @@ -2,7 +2,7 @@ const fs = require('fs'); const assert = require('assert'); const path = require('path'); -const { analysisDimensions, Cleaner, getInsightViews, getCombination, getDimSetsBasedOnClusterGroups } = require('../build/cjs/index'); +const { Insight, Cleaner, Statistics, Sampling } = require('../build/cjs/index'); const datasetPath = path.resolve(__dirname, './dataset/airbnb.json'); const dataset = JSON.parse(fs.readFileSync(datasetPath).toString()); @@ -13,32 +13,23 @@ const { Measures: measures } } = dataset; -let cleanData = Cleaner.dropNull(dataSource, dimensions, measures); +let cleanData = Sampling.reservoirSampling(Cleaner.dropNull(dataSource, dimensions, measures), 2000); describe('insights test', function () { - // it('print(analysisDimensions)', function () { - // const result = analysisDimensions(cleanData, dimensions, measures); - // console.table(result.map(r => { - // return [r[0][0], JSON.stringify(r[1]), JSON.stringify(r[2])]; - // })) - // assert.equal(result.length, dimensions.length); - // }) - - // it('print(getInsightViews)', function () { - // let result = getInsightViews(cleanData, dimensions, measures); - // // console.log(result) - // assert.equal(result.length > 0, true); - // }) + it('print(analysisDimensions)', function () { + const result = Insight.insightExtraction(cleanData, dimensions, measures); + assert.equal(result.length > 0, true); + }) it('print(getCombination)', function () { - let result = getCombination([1, 2, 3, 4, 5, 6]); + let result = Statistics.getCombination([1, 2, 3, 4, 5, 6]); console.log(result) assert.equal(result.length, Math.pow(2, 6) - 1) }) it('print(clusterCombination vs. combination)', function () { - let result = getDimSetsBasedOnClusterGroups(cleanData, dimensions); - let unClusterResult = getCombination(dimensions); + let result = Insight.subspaceSearching(cleanData, dimensions, true); + let unClusterResult = Statistics.getCombination(dimensions); console.log(result.length, unClusterResult.length, result) assert.equal(result.length <= unClusterResult.length, true); }) diff --git a/packages/visual-insights/test/specification.js b/packages/visual-insights/test/specification.js index ba6b6eb9..62c4fe3c 100644 --- a/packages/visual-insights/test/specification.js +++ b/packages/visual-insights/test/specification.js @@ -1,6 +1,6 @@ const assert = require('assert'); const fs = require('fs'); -const { specification, fieldsAnalysis } = require('../build/cjs/index'); +const { specification, UnivariateSummary } = require('../build/cjs/index'); const path = require('path'); const datasetPath = path.resolve(__dirname, './dataset/titanic.json'); @@ -23,7 +23,13 @@ const dimensions = ['Age', 'Survived', 'Parch', 'Sex', 'Embarked', 'Pclass']; describe('specification test', function () { it('specification result', function () { - const { dimScores } = fieldsAnalysis(dataSource, dimensions, measures); + const fieldEntropyList = UnivariateSummary.getAllFieldsEntropy(dataSource, dimensions.concat(measures)); + const dimScores = fieldEntropyList.map(f => { + return [f.fieldName, f.entropy, f.maxEntropy, { + name: f.fieldName, + type: UnivariateSummary.getFieldType(dataSource, f.fieldName) + }] + }) const { schema, aggData } = specification(dimScores, dataSource, dimensions, measures); console.log(schema); assert.equal(Object.keys(schema).length > 0, true);