Skip to content

Commit

Permalink
[ML] DataFrame Analytics use field caps api to set column type (#54543)
Browse files Browse the repository at this point in the history
* wip: initialize newJobCaps service in parent element

* wip: use jobCaps service to create columns

* add render and types to talble columns

* add keyword suffix when constructing query. ensure pagination works

* Ensure search query and sorting works

* wip: update regression table to use jobCaps api

* move shared resources to central location

* ensure 0 and false values show up in table

* add error handling to jobCaps initialization

* ensure outlier detection table can toggle columns

* check for undefined before using moment to create date

* add tests for fix for getNestedProperty
  • Loading branch information
alvarezmelissa87 authored Jan 14, 2020
1 parent 45f8ca9 commit 69730ce
Show file tree
Hide file tree
Showing 15 changed files with 623 additions and 393 deletions.
2 changes: 1 addition & 1 deletion x-pack/legacy/plugins/ml/common/types/fields.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export interface Field {
id: FieldId;
name: string;
type: ES_FIELD_TYPES;
aggregatable: boolean;
aggregatable?: boolean;
aggIds?: AggId[];
aggs?: Aggregation[];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { ml } from '../../services/ml_api_service';
import { Dictionary } from '../../../../common/types/common';
import { getErrorMessage } from '../pages/analytics_management/hooks/use_create_analytics_form';
import { SavedSearchQuery } from '../../contexts/kibana';
import { SortDirection } from '../../components/ml_in_memory_table';

export type IndexName = string;
export type IndexPattern = string;
Expand All @@ -39,6 +40,13 @@ interface ClassificationAnalysis {
};
}

export interface LoadExploreDataArg {
field: string;
direction: SortDirection;
searchQuery: SavedSearchQuery;
requiresKeyword?: boolean;
}

export const SEARCH_SIZE = 1000;

export const defaultSearchQuery = {
Expand Down Expand Up @@ -182,7 +190,7 @@ export const getPredictedFieldName = (
const defaultPredictionField = `${getDependentVar(analysis)}_prediction`;
const predictedField = `${resultsField}.${
predictionFieldName ? predictionFieldName : defaultPredictionField
}${isClassificationAnalysis(analysis) && !forSort ? '.keyword' : ''}`;
}`;
return predictedField;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
*/

import { getNestedProperty } from '../../util/object_utils';
import { DataFrameAnalyticsConfig, getPredictedFieldName, getDependentVar } from './analytics';
import {
DataFrameAnalyticsConfig,
getPredictedFieldName,
getDependentVar,
getPredictionFieldName,
} from './analytics';
import { Field } from '../../../../common/types/fields';
import { ES_FIELD_TYPES } from '../../../../../../../../src/plugins/data/public';
import { newJobCapsService } from '../../services/new_job_capabilities_service';

export type EsId = string;
export type EsDocSource = Record<string, any>;
Expand All @@ -19,8 +27,41 @@ export interface EsDoc extends Record<string, any> {
export const MAX_COLUMNS = 20;
export const DEFAULT_REGRESSION_COLUMNS = 8;

export const BASIC_NUMERICAL_TYPES = new Set([
ES_FIELD_TYPES.LONG,
ES_FIELD_TYPES.INTEGER,
ES_FIELD_TYPES.SHORT,
ES_FIELD_TYPES.BYTE,
]);

export const EXTENDED_NUMERICAL_TYPES = new Set([
ES_FIELD_TYPES.DOUBLE,
ES_FIELD_TYPES.FLOAT,
ES_FIELD_TYPES.HALF_FLOAT,
ES_FIELD_TYPES.SCALED_FLOAT,
]);

const ML__ID_COPY = 'ml__id_copy';

export const isKeywordAndTextType = (fieldName: string): boolean => {
const { fields } = newJobCapsService;

const fieldType = fields.find(field => field.name === fieldName)?.type;
let isBothTypes = false;

// If it's a keyword type - check if it has a corresponding text type
if (fieldType !== undefined && fieldType === ES_FIELD_TYPES.KEYWORD) {
const field = newJobCapsService.getFieldById(fieldName.replace(/\.keyword$/, ''));
isBothTypes = field !== null && field.type === ES_FIELD_TYPES.TEXT;
} else if (fieldType !== undefined && fieldType === ES_FIELD_TYPES.TEXT) {
// If text, check if has corresponding keyword type
const field = newJobCapsService.getFieldById(`${fieldName}.keyword`);
isBothTypes = field !== null && field.type === ES_FIELD_TYPES.KEYWORD;
}

return isBothTypes;
};

// Used to sort columns:
// - string based columns are moved to the left
// - followed by the outlier_score column
Expand Down Expand Up @@ -90,10 +131,10 @@ export const sortRegressionResultsFields = (
if (b === predictedField) {
return 1;
}
if (a === dependentVariable) {
if (a === dependentVariable || a === dependentVariable.replace(/\.keyword$/, '')) {
return -1;
}
if (b === dependentVariable) {
if (b === dependentVariable || b === dependentVariable.replace(/\.keyword$/, '')) {
return 1;
}

Expand Down Expand Up @@ -200,6 +241,50 @@ export function getFlattenedFields(obj: EsDocSource, resultsField: string): EsFi
return flatDocFields.filter(f => f !== ML__ID_COPY);
}

export const getDefaultFieldsFromJobCaps = (
fields: Field[],
jobConfig: DataFrameAnalyticsConfig
): { selectedFields: Field[]; docFields: Field[] } => {
const fieldsObj = { selectedFields: [], docFields: [] };
if (fields.length === 0) {
return fieldsObj;
}

const dependentVariable = getDependentVar(jobConfig.analysis);
const type = newJobCapsService.getFieldById(dependentVariable)?.type;
const predictionFieldName = getPredictionFieldName(jobConfig.analysis);
// default is 'ml'
const resultsField = jobConfig.dest.results_field;

const defaultPredictionField = `${dependentVariable}_prediction`;
const predictedField = `${resultsField}.${
predictionFieldName ? predictionFieldName : defaultPredictionField
}`;

const allFields: any = [
{
id: `${resultsField}.is_training`,
name: `${resultsField}.is_training`,
type: ES_FIELD_TYPES.BOOLEAN,
},
{ id: predictedField, name: predictedField, type },
...fields,
].sort(({ name: a }, { name: b }) => sortRegressionResultsFields(a, b, jobConfig));

let selectedFields = allFields
.slice(0, DEFAULT_REGRESSION_COLUMNS * 2)
.filter((field: any) => field.name === predictedField || !field.name.includes('.keyword'));

if (selectedFields.length > DEFAULT_REGRESSION_COLUMNS) {
selectedFields = selectedFields.slice(0, DEFAULT_REGRESSION_COLUMNS);
}

return {
selectedFields,
docFields: allFields,
};
};

export const getDefaultClassificationFields = (
docs: EsDoc[],
jobConfig: DataFrameAnalyticsConfig
Expand Down Expand Up @@ -290,15 +375,29 @@ export const getDefaultSelectableFields = (docs: EsDoc[], resultsField: string):
.slice(0, MAX_COLUMNS);
};

export const toggleSelectedField = (
export const toggleSelectedFieldSimple = (
selectedFields: EsFieldName[],
column: EsFieldName
): EsFieldName[] => {
const index = selectedFields.indexOf(column);

if (index === -1) {
selectedFields.push(column);
} else {
selectedFields.splice(index, 1);
}
return selectedFields;
};

export const toggleSelectedField = (selectedFields: Field[], column: EsFieldName): Field[] => {
const index = selectedFields.map(field => field.name).indexOf(column);
if (index === -1) {
const columnField = newJobCapsService.getFieldById(column);
if (columnField !== null) {
selectedFields.push(columnField);
}
} else {
selectedFields.splice(index, 1);
}
return selectedFields;
};
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ export {
getDefaultSelectableFields,
getDefaultRegressionFields,
getDefaultClassificationFields,
getDefaultFieldsFromJobCaps,
getFlattenedFields,
sortColumns,
sortRegressionResultsColumns,
sortRegressionResultsFields,
toggleSelectedField,
toggleSelectedFieldSimple,
EsId,
EsDoc,
EsDocSource,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import { ResultsTable } from './results_table';
import { DATA_FRAME_TASK_STATE } from '../../../analytics_management/components/analytics_list/common';
import { ResultsSearchQuery, defaultSearchQuery } from '../../../../common/analytics';
import { LoadingPanel } from '../loading_panel';
import { getIndexPatternIdFromName } from '../../../../../util/index_utils';
import { IIndexPattern } from '../../../../../../../../../../../src/plugins/data/common/index_patterns';
import { newJobCapsService } from '../../../../../services/new_job_capabilities_service';
import { useKibanaContext } from '../../../../../contexts/kibana';

interface GetDataFrameAnalyticsResponse {
count: number;
Expand All @@ -31,6 +35,21 @@ export const ExplorationTitle: React.FC<{ jobId: string }> = ({ jobId }) => (
</EuiTitle>
);

const jobConfigErrorTitle = i18n.translate(
'xpack.ml.dataframe.analytics.classificationExploration.jobConfigurationFetchError',
{
defaultMessage:
'Unable to fetch results. An error occurred loading the job configuration data.',
}
);

const jobCapsErrorTitle = i18n.translate(
'xpack.ml.dataframe.analytics.classificationExploration.jobCapsFetchError',
{
defaultMessage: "Unable to fetch results. An error occurred loading the index's field data.",
}
);

interface Props {
jobId: string;
jobStatus: DATA_FRAME_TASK_STATE;
Expand All @@ -39,8 +58,13 @@ interface Props {
export const ClassificationExploration: FC<Props> = ({ jobId, jobStatus }) => {
const [jobConfig, setJobConfig] = useState<DataFrameAnalyticsConfig | undefined>(undefined);
const [isLoadingJobConfig, setIsLoadingJobConfig] = useState<boolean>(false);
const [isInitialized, setIsInitialized] = useState<boolean>(false);
const [jobConfigErrorMessage, setJobConfigErrorMessage] = useState<undefined | string>(undefined);
const [jobCapsServiceErrorMessage, setJobCapsServiceErrorMessage] = useState<undefined | string>(
undefined
);
const [searchQuery, setSearchQuery] = useState<ResultsSearchQuery>(defaultSearchQuery);
const kibanaContext = useKibanaContext();

const loadJobConfig = async () => {
setIsLoadingJobConfig(true);
Expand Down Expand Up @@ -78,23 +102,41 @@ export const ClassificationExploration: FC<Props> = ({ jobId, jobStatus }) => {
loadJobConfig();
}, []);

if (jobConfigErrorMessage !== undefined) {
const initializeJobCapsService = async () => {
if (jobConfig !== undefined) {
try {
const sourceIndex = jobConfig.source.index[0];
const indexPatternId = getIndexPatternIdFromName(sourceIndex) || sourceIndex;
const indexPattern: IIndexPattern = await kibanaContext.indexPatterns.get(indexPatternId);
if (indexPattern !== undefined) {
await newJobCapsService.initializeFromIndexPattern(indexPattern, false, false);
}
setIsInitialized(true);
} catch (e) {
if (e.message !== undefined) {
setJobCapsServiceErrorMessage(e.message);
} else {
setJobCapsServiceErrorMessage(JSON.stringify(e));
}
}
}
};

useEffect(() => {
initializeJobCapsService();
}, [JSON.stringify(jobConfig)]);

if (jobConfigErrorMessage !== undefined || jobCapsServiceErrorMessage !== undefined) {
return (
<EuiPanel grow={false}>
<ExplorationTitle jobId={jobId} />
<EuiSpacer />
<EuiCallOut
title={i18n.translate(
'xpack.ml.dataframe.analytics.classificationExploration.jobConfigurationFetchError',
{
defaultMessage:
'Unable to fetch results. An error occurred loading the job configuration data.',
}
)}
title={jobConfigErrorMessage ? jobConfigErrorTitle : jobCapsErrorTitle}
color="danger"
iconType="cross"
>
<p>{jobConfigErrorMessage}</p>
<p>{jobConfigErrorMessage ? jobConfigErrorMessage : jobCapsServiceErrorMessage}</p>
</EuiCallOut>
</EuiPanel>
);
Expand All @@ -103,12 +145,12 @@ export const ClassificationExploration: FC<Props> = ({ jobId, jobStatus }) => {
return (
<Fragment>
{isLoadingJobConfig === true && jobConfig === undefined && <LoadingPanel />}
{isLoadingJobConfig === false && jobConfig !== undefined && (
{isLoadingJobConfig === false && jobConfig !== undefined && isInitialized === true && (
<EvaluatePanel jobConfig={jobConfig} jobStatus={jobStatus} searchQuery={searchQuery} />
)}
<EuiSpacer />
{isLoadingJobConfig === true && jobConfig === undefined && <LoadingPanel />}
{isLoadingJobConfig === false && jobConfig !== undefined && (
{isLoadingJobConfig === false && jobConfig !== undefined && isInitialized === true && (
<ResultsTable
jobConfig={jobConfig}
jobStatus={jobStatus}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
loadDocsCount,
DataFrameAnalyticsConfig,
} from '../../../../common';
import { isKeywordAndTextType } from '../../../../common/fields';
import { getTaskStateBadge } from '../../../analytics_management/components/analytics_list/columns';
import { DATA_FRAME_TASK_STATE } from '../../../analytics_management/components/analytics_list/common';
import {
Expand All @@ -37,13 +38,8 @@ import {
ResultsSearchQuery,
ANALYSIS_CONFIG_TYPE,
} from '../../../../common/analytics';
import { IIndexPattern } from '../../../../../../../../../../../src/plugins/data/common/index_patterns';
import { ES_FIELD_TYPES } from '../../../../../../../../../../../src/plugins/data/public';
import { LoadingPanel } from '../loading_panel';
import { getColumnData } from './column_data';
import { useKibanaContext } from '../../../../../contexts/kibana';
import { newJobCapsService } from '../../../../../services/new_job_capabilities_service';
import { getIndexPatternIdFromName } from '../../../../../util/index_utils';

const defaultPanelWidth = 500;

Expand All @@ -66,10 +62,8 @@ export const EvaluatePanel: FC<Props> = ({ jobConfig, jobStatus, searchQuery })
const [visibleColumns, setVisibleColumns] = useState(() =>
columns.map(({ id }: { id: string }) => id)
);
const kibanaContext = useKibanaContext();

const index = jobConfig.dest.index;
const sourceIndex = jobConfig.source.index[0];
const dependentVariable = getDependentVar(jobConfig.analysis);
const predictionFieldName = getPredictionFieldName(jobConfig.analysis);
// default is 'ml'
Expand All @@ -86,25 +80,7 @@ export const EvaluatePanel: FC<Props> = ({ jobConfig, jobStatus, searchQuery })
setIsLoading(true);

try {
const indexPatternId = getIndexPatternIdFromName(sourceIndex) || sourceIndex;
const indexPattern: IIndexPattern = await kibanaContext.indexPatterns.get(indexPatternId);

if (indexPattern !== undefined) {
await newJobCapsService.initializeFromIndexPattern(indexPattern, false, false);
// If dependent_variable is of type keyword and text .keyword suffix is required for evaluate endpoint
const { fields } = newJobCapsService;
const depVarFieldType = fields.find(field => field.name === dependentVariable)?.type;

// If it's a keyword type - check if it has a corresponding text type
if (depVarFieldType !== undefined && depVarFieldType === ES_FIELD_TYPES.KEYWORD) {
const field = newJobCapsService.getFieldById(dependentVariable.replace(/\.keyword$/, ''));
requiresKeyword = field !== null && field.type === ES_FIELD_TYPES.TEXT;
} else if (depVarFieldType !== undefined && depVarFieldType === ES_FIELD_TYPES.TEXT) {
// If text, check if has corresponding keyword type
const field = newJobCapsService.getFieldById(`${dependentVariable}.keyword`);
requiresKeyword = field !== null && field.type === ES_FIELD_TYPES.KEYWORD;
}
}
requiresKeyword = isKeywordAndTextType(dependentVariable);
} catch (e) {
// Additional error handling due to missing field type is handled by loadEvalData
console.error('Unable to load new field types', error); // eslint-disable-line no-console
Expand Down Expand Up @@ -359,9 +335,9 @@ export const EvaluatePanel: FC<Props> = ({ jobConfig, jobStatus, searchQuery })
<Fragment />
</EuiFormRow>
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiFlexItem grow={false} style={{ width: '90%' }}>
<EuiDataGrid
aria-label="Data grid demo"
aria-label="Classification confusion matrix"
columns={columns}
columnVisibility={{ visibleColumns, setVisibleColumns }}
rowCount={columnsData.length}
Expand Down
Loading

0 comments on commit 69730ce

Please sign in to comment.