-
Notifications
You must be signed in to change notification settings - Fork 8.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] Update installation mechanism for Transforms in Fleet according to new specifications #140046
Changes from 18 commits
6a276de
ceca037
66a1886
679d000
79c7127
1f02beb
5aec440
c984cf5
911f676
807db41
822ca03
fe5ab2f
04c7b51
a83c0aa
d0aac73
c5c9643
ff35926
efe1d8c
fa7e8e2
8fa543f
ae38fc9
f2d92ec
5a98445
572e2e5
203b0ab
eb060c0
09a7c08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -7,26 +7,43 @@ | |||
|
||||
import type { ElasticsearchClient, Logger, SavedObjectsClientContract } from '@kbn/core/server'; | ||||
import { errors } from '@elastic/elasticsearch'; | ||||
import { safeLoad } from 'js-yaml'; | ||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object'; | ||||
|
||||
import { installComponentAndIndexTemplateForDataStream } from '../template/install'; | ||||
import { processFields } from '../../fields/field'; | ||||
import { generateMappings } from '../template/template'; | ||||
import { getESAssetMetadata } from '../meta'; | ||||
import { updateEsAssetReferences } from '../../packages/install'; | ||||
import { getPathParts } from '../../archive'; | ||||
import { ElasticsearchAssetType } from '../../../../../common/types/models'; | ||||
import type { EsAssetReference, InstallablePackage } from '../../../../../common/types/models'; | ||||
import type { | ||||
EsAssetReference, | ||||
InstallablePackage, | ||||
ESAssetMetadata, | ||||
IndexTemplate, | ||||
} from '../../../../../common/types/models'; | ||||
import { getInstallation } from '../../packages'; | ||||
|
||||
import { getESAssetMetadata } from '../meta'; | ||||
|
||||
import { retryTransientEsErrors } from '../retry'; | ||||
|
||||
import { mergeIndexTemplateWithMappings } from './merge_index_template_mappings'; | ||||
import { deleteTransforms } from './remove'; | ||||
import { getAsset } from './common'; | ||||
|
||||
interface TransformInstallation { | ||||
interface TransformModuleBase { | ||||
transformModuleId?: string; | ||||
} | ||||
interface DestinationIndexTemplateInstallation extends TransformModuleBase { | ||||
installationName: string; | ||||
_meta: ESAssetMetadata; | ||||
template: IndexTemplate['template']; | ||||
} | ||||
interface TransformInstallation extends TransformModuleBase { | ||||
installationName: string; | ||||
content: any; | ||||
} | ||||
|
||||
export const installTransform = async ( | ||||
export const installLegacyTransforms = async ( | ||||
installablePackage: InstallablePackage, | ||||
paths: string[], | ||||
esClient: ElasticsearchClient, | ||||
|
@@ -65,7 +82,7 @@ export const installTransform = async ( | |||
if (transformPaths.length > 0) { | ||||
const transformRefs = transformPaths.reduce<EsAssetReference[]>((acc, path) => { | ||||
acc.push({ | ||||
id: getTransformNameForInstallation(installablePackage, path, installNameSuffix), | ||||
id: getLegacyTransformNameForInstallation(installablePackage, path, installNameSuffix), | ||||
type: ElasticsearchAssetType.transform, | ||||
}); | ||||
|
||||
|
@@ -87,7 +104,7 @@ export const installTransform = async ( | |||
content._meta = getESAssetMetadata({ packageName: installablePackage.name }); | ||||
|
||||
return { | ||||
installationName: getTransformNameForInstallation( | ||||
installationName: getLegacyTransformNameForInstallation( | ||||
installablePackage, | ||||
path, | ||||
installNameSuffix | ||||
|
@@ -117,6 +134,218 @@ export const installTransform = async ( | |||
return { installedTransforms, esReferences }; | ||||
}; | ||||
|
||||
export const installTransforms = async ( | ||||
installablePackage: InstallablePackage, | ||||
paths: string[], | ||||
esClient: ElasticsearchClient, | ||||
savedObjectsClient: SavedObjectsClientContract, | ||||
logger: Logger, | ||||
esReferences?: EsAssetReference[] | ||||
) => { | ||||
const transformPaths = paths.filter((path) => isTransform(path)); | ||||
// If package contains legacy transform specifications (i.e. with json instead of yml) | ||||
if (transformPaths.some((p) => p.endsWith('.json')) || transformPaths.length === 0) { | ||||
return await installLegacyTransforms( | ||||
installablePackage, | ||||
paths, | ||||
esClient, | ||||
savedObjectsClient, | ||||
logger, | ||||
esReferences | ||||
); | ||||
} | ||||
|
||||
// Assuming the package will only contain .yml specifications | ||||
const installation = await getInstallation({ | ||||
savedObjectsClient, | ||||
pkgName: installablePackage.name, | ||||
}); | ||||
joshdover marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
esReferences = esReferences ?? installation?.installed_es ?? []; | ||||
|
||||
let previousInstalledTransformEsAssets: EsAssetReference[] = []; | ||||
if (installation) { | ||||
previousInstalledTransformEsAssets = installation.installed_es.filter( | ||||
({ type, id }) => type === ElasticsearchAssetType.transform | ||||
); | ||||
if (previousInstalledTransformEsAssets.length) { | ||||
logger.info( | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is debug log level more appropriate for this message? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was previously logger.info but I can definitely change it to debug level if it's more appropriate 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here fa7e8e2 |
||||
`Found previous transform references:\n ${JSON.stringify( | ||||
previousInstalledTransformEsAssets | ||||
)}` | ||||
); | ||||
} | ||||
} | ||||
|
||||
// delete all previous transform | ||||
await deleteTransforms( | ||||
esClient, | ||||
previousInstalledTransformEsAssets.map((asset) => asset.id) | ||||
); | ||||
|
||||
const installNameSuffix = `${installablePackage.version}`; | ||||
let installedTransforms: EsAssetReference[] = []; | ||||
if (transformPaths.length > 0) { | ||||
const transformsSpecifications = new Map(); | ||||
const destinationIndexTemplates: DestinationIndexTemplateInstallation[] = []; | ||||
const indices = []; | ||||
const transforms: TransformInstallation[] = []; | ||||
|
||||
transformPaths.forEach((path: string) => { | ||||
const { transformModuleId, fileName } = getTransformFolderAndFileNames( | ||||
installablePackage, | ||||
path | ||||
); | ||||
|
||||
// Since there can be multiple assets per transform definition | ||||
// We want to create a unique list of assets/specifications for each transform | ||||
if (transformsSpecifications.get(transformModuleId) === undefined) { | ||||
transformsSpecifications.set(transformModuleId, new Map()); | ||||
} | ||||
const packageAssets = transformsSpecifications.get(transformModuleId); | ||||
|
||||
const content = safeLoad(getAsset(path).toString('utf-8')); | ||||
|
||||
if (fileName === 'fields') { | ||||
const validFields = processFields(content); | ||||
const mappings = generateMappings(validFields); | ||||
packageAssets?.set('mappings', mappings); | ||||
} | ||||
|
||||
if (fileName === 'transform') { | ||||
transformsSpecifications.get(transformModuleId)?.set('destinationIndex', content.dest); | ||||
indices.push(content.dest); | ||||
transformsSpecifications.get(transformModuleId)?.set('transform', content); | ||||
content._meta = getESAssetMetadata({ packageName: installablePackage.name }); | ||||
transforms.push({ | ||||
transformModuleId, | ||||
installationName: getTransformNameForInstallation( | ||||
installablePackage, | ||||
transformModuleId, | ||||
installNameSuffix | ||||
), | ||||
content, | ||||
}); | ||||
indices.push(content.dest); | ||||
} | ||||
|
||||
if (fileName === 'manifest') { | ||||
if (isPopulatedObject(content, ['start']) && content.start === false) { | ||||
transformsSpecifications.get(transformModuleId)?.set('start', false); | ||||
} | ||||
// If manifest.yml contains destination_index_template | ||||
// Combine the mappings and other index template settings from manifest.yml into a single index template | ||||
// Create the index template and track the template in EsAssetReferences | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we considered using component templates here instead of manually merging? When we create data streams in fleet we have a structure which allows users to customise the data streams if they need to, here is the de doc describing the structure:
The issue with having all of the settings and mappings on the index template is that the user has to modify the index template to make changes, which will then be overwritten on package update. the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is interesting information, but also implies immense complexity, so I think we'll need to take a step back and set some boundaries on the scope of transforms in packages. The complexity comes because if the package data stream, which provides the transform's source data, can be infinitely customised by the user then the transform itself may need to be customised to match. In the case of a For a This might avoid the need to have the ability to change the behaviour of the transform itself. It's also interesting that in the transforms that have been added to packages in the past the namespace has been hardcoded as What's the package with the most complex combination of component templates we have today? I think it would help if we could look at how all the flexibility is currently being used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @droberts195 Reading the original specification elastic/package-spec#307 again and I think I misunderstood the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
AWS ships probably the most component template of any integration since it has so many policy templates. If you install the AWS integration w/ many of its inputs enabled you'll get quite a lot of component templates installed. I don't know if this is necessarily "complex" as they don't do anything too involved from what I can tell, but maybe this is helpful here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We haven't discussed customization as a requirement for transforms in packages and IMO we should exclude that from the scope for now. I do think it's a good idea to avoid any manual merging on the Kibana side if possible. I'm curious if we actually need to support both There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After some discussion with Mark, I switched to using buildComponentTemplates helper to handle the logic for better consistency with what Fleet is doing. However, if we think it's better to throw error instead of both are defined I can definitely do that as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here 8fa543f |
||||
if ( | ||||
isPopulatedObject(content, ['destination_index_template']) || | ||||
isPopulatedObject(packageAssets.get('mappings')) | ||||
) { | ||||
const destinationIndexTemplate = | ||||
(content.destination_index_template as Record<string, unknown>) ?? {}; | ||||
destinationIndexTemplates.push({ | ||||
transformModuleId, | ||||
_meta: getESAssetMetadata({ packageName: installablePackage.name }), | ||||
installationName: getTransformNameForInstallation( | ||||
installablePackage, | ||||
transformModuleId, | ||||
installNameSuffix, | ||||
'template' | ||||
), | ||||
template: destinationIndexTemplate, | ||||
} as DestinationIndexTemplateInstallation); | ||||
packageAssets.set('destinationIndexTemplate', destinationIndexTemplate); | ||||
} | ||||
} | ||||
}); | ||||
|
||||
const indexTemplatesRefs = destinationIndexTemplates.map((template) => ({ | ||||
id: template.installationName, | ||||
type: ElasticsearchAssetType.indexTemplate, | ||||
})); | ||||
|
||||
const transformRefs = transforms.map((t) => ({ | ||||
id: t.installationName, | ||||
type: ElasticsearchAssetType.transform, | ||||
})); | ||||
|
||||
// get and save refs associated with the transforms before installing | ||||
esReferences = await updateEsAssetReferences( | ||||
savedObjectsClient, | ||||
installablePackage.name, | ||||
esReferences, | ||||
{ | ||||
assetsToAdd: [...indexTemplatesRefs, ...transformRefs], | ||||
assetsToRemove: previousInstalledTransformEsAssets, | ||||
} | ||||
); | ||||
|
||||
await Promise.all( | ||||
destinationIndexTemplates | ||||
.map((destinationIndexTemplate) => { | ||||
const mergedTemplate = mergeIndexTemplateWithMappings( | ||||
destinationIndexTemplate.template, | ||||
transformsSpecifications | ||||
.get(destinationIndexTemplate.transformModuleId) | ||||
?.get('mappings') | ||||
); | ||||
if (mergedTemplate !== undefined) { | ||||
return installComponentAndIndexTemplateForDataStream({ | ||||
esClient, | ||||
logger, | ||||
componentTemplates: {}, | ||||
indexTemplate: { | ||||
templateName: destinationIndexTemplate.installationName, | ||||
// @ts-expect-error Index template here should not contain data_stream property | ||||
// as this template is applied to only an index and not a data stream | ||||
kpollich marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
indexTemplate: { | ||||
template: mergedTemplate, | ||||
priority: 250, | ||||
index_patterns: [ | ||||
transformsSpecifications | ||||
.get(destinationIndexTemplate.transformModuleId) | ||||
?.get('destinationIndex').index, | ||||
], | ||||
_meta: destinationIndexTemplate._meta, | ||||
composed_of: [], | ||||
}, | ||||
}, | ||||
}); | ||||
} | ||||
}) | ||||
.filter((p) => p !== undefined) | ||||
); | ||||
await Promise.all( | ||||
transforms.map(async (transform) => { | ||||
const index = transform.content.dest.index; | ||||
const pipelineId = transform.content.dest.pipeline; | ||||
|
||||
const indexExist = await esClient.indices.exists({ | ||||
index, | ||||
}); | ||||
if (indexExist !== true) { | ||||
return esClient.indices.create({ | ||||
index, | ||||
...(pipelineId ? { settings: { default_pipeline: pipelineId } } : {}), | ||||
}); | ||||
} | ||||
joshdover marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
}) | ||||
); | ||||
|
||||
const transformsPromises = transforms.map(async (transform) => { | ||||
return handleTransformInstall({ | ||||
esClient, | ||||
logger, | ||||
transform, | ||||
startTransform: transformsSpecifications.get(transform.transformModuleId)?.get('start'), | ||||
}); | ||||
}); | ||||
|
||||
installedTransforms = await Promise.all(transformsPromises).then((results) => results.flat()); | ||||
} | ||||
|
||||
return { installedTransforms, esReferences }; | ||||
}; | ||||
|
||||
export const isTransform = (path: string) => { | ||||
const pathParts = getPathParts(path); | ||||
return !path.endsWith('/') && pathParts.type === ElasticsearchAssetType.transform; | ||||
|
@@ -126,10 +355,12 @@ async function handleTransformInstall({ | |||
esClient, | ||||
logger, | ||||
transform, | ||||
startTransform, | ||||
}: { | ||||
esClient: ElasticsearchClient; | ||||
logger: Logger; | ||||
transform: TransformInstallation; | ||||
startTransform?: boolean; | ||||
}): Promise<EsAssetReference> { | ||||
try { | ||||
await retryTransientEsErrors( | ||||
|
@@ -151,15 +382,20 @@ async function handleTransformInstall({ | |||
throw err; | ||||
} | ||||
} | ||||
await esClient.transform.startTransform( | ||||
{ transform_id: transform.installationName }, | ||||
{ ignore: [409] } | ||||
); | ||||
|
||||
// start transform by default if not set in yml file | ||||
// else, respect the setting | ||||
if (startTransform === undefined || startTransform === true) { | ||||
await esClient.transform.startTransform( | ||||
{ transform_id: transform.installationName }, | ||||
{ ignore: [409] } | ||||
); | ||||
} | ||||
joshdover marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
return { id: transform.installationName, type: ElasticsearchAssetType.transform }; | ||||
} | ||||
|
||||
const getTransformNameForInstallation = ( | ||||
const getLegacyTransformNameForInstallation = ( | ||||
installablePackage: InstallablePackage, | ||||
path: string, | ||||
suffix: string | ||||
|
@@ -169,3 +405,27 @@ const getTransformNameForInstallation = ( | |||
const folderName = pathPaths?.pop(); | ||||
return `${installablePackage.name}.${folderName}-${filename}-${suffix}`; | ||||
}; | ||||
|
||||
const getTransformNameForInstallation = ( | ||||
installablePackage: InstallablePackage, | ||||
transformModuleId: string, | ||||
suffix: string, | ||||
assetType?: string | ||||
) => { | ||||
return `logs-${installablePackage.name}.${transformModuleId}-${ | ||||
peteharverson marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
assetType === undefined ? 'default' : assetType | ||||
}-${suffix}`; | ||||
}; | ||||
|
||||
const getTransformFolderAndFileNames = (installablePackage: InstallablePackage, path: string) => { | ||||
const pathPaths = path.split('/'); | ||||
const fileName = pathPaths?.pop()?.split('.')[0]; | ||||
let transformModuleId = pathPaths?.pop(); | ||||
|
||||
// If fields.yml is located inside a directory called 'fields' (e.g. {exampleFolder}/fields/fields.yml) | ||||
// We need to go one level up to get the real folder name | ||||
if (transformModuleId === 'fields') { | ||||
transformModuleId = pathPaths?.pop(); | ||||
} | ||||
return { fileName: fileName ?? '', transformModuleId: transformModuleId ?? '' }; | ||||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function is quite long, are there any bits logic that could be pulled out into separate functions and unit tested?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated here fa7e8e2