From 9e5d6b3e0fa731a37374c091cd7284d1c41f116e Mon Sep 17 00:00:00 2001 From: Paul Tavares <56442535+paul-tavares@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:46:55 -0400 Subject: [PATCH] [Fleet] Adjust background File cleanup task to also process `to-host` indexes (#161138) ## Summary - Updates the `fleet:check-deleted-files-task` to include the indexes that store files for delivery to the Host (currently used only by Endpoint integration) --- .../common/services/file_storage.test.ts | 17 ---- .../fleet/common/services/file_storage.ts | 38 --------- .../fleet/server/services/files/index.test.ts | 7 +- .../fleet/server/services/files/index.ts | 33 ++++---- .../fleet/server/services/files/utils.test.ts | 66 ++++++++++++++++ .../fleet/server/services/files/utils.ts | 77 +++++++++++++++++++ .../tasks/check_deleted_files_task.test.ts | 35 ++++++++- 7 files changed, 198 insertions(+), 75 deletions(-) create mode 100644 x-pack/plugins/fleet/server/services/files/utils.test.ts create mode 100644 x-pack/plugins/fleet/server/services/files/utils.ts diff --git a/x-pack/plugins/fleet/common/services/file_storage.test.ts b/x-pack/plugins/fleet/common/services/file_storage.test.ts index dbf5da61dba1d..4e4292a57808a 100644 --- a/x-pack/plugins/fleet/common/services/file_storage.test.ts +++ b/x-pack/plugins/fleet/common/services/file_storage.test.ts @@ -5,12 +5,8 @@ * 2.0. */ -import { FILE_STORAGE_METADATA_INDEX_PATTERN } from '../constants'; - import { getFileDataIndexName, getFileMetadataIndexName } from '..'; -import { getIntegrationNameFromIndexName } from './file_storage'; - describe('File Storage services', () => { describe('File Index Names', () => { it('should generate file metadata index name for files received from host', () => { @@ -29,17 +25,4 @@ describe('File Storage services', () => { expect(getFileDataIndexName('foo', true)).toEqual('.fleet-fileds-tohost-data-foo'); }); }); - - describe('getIntegrationNameFromIndexName()', () => { - it.each([ - ['regular index names', '.fleet-fileds-fromhost-meta-agent'], - ['datastream index names', '.ds-.fleet-fileds-fromhost-data-agent-2023.06.30-00001'], - ])('should handle %s', (_, index) => { - expect(getIntegrationNameFromIndexName(index, FILE_STORAGE_METADATA_INDEX_PATTERN)).toEqual( - 'agent' - ); - }); - - it.todo('should error if index pattern does not include `*`'); - }); }); diff --git a/x-pack/plugins/fleet/common/services/file_storage.ts b/x-pack/plugins/fleet/common/services/file_storage.ts index af909a22aa946..7bbc9dd2485f4 100644 --- a/x-pack/plugins/fleet/common/services/file_storage.ts +++ b/x-pack/plugins/fleet/common/services/file_storage.ts @@ -55,41 +55,3 @@ export const getFileDataIndexName = ( `Unable to define integration file data index. No '*' in index pattern: ${dataIndex}` ); }; - -/** - * Returns back the integration name for a given File Data (chunks) index name. - * - * @example - * // Given a File data index pattern of `.fleet-fileds-fromhost-data-*`: - * - * getIntegrationNameFromFileDataIndexName('.fleet-fileds-fromhost-data-agent'); - * // return 'agent' - * - * getIntegrationNameFromFileDataIndexName('.ds-.fleet-fileds-fromhost-data-agent'); - * // return 'agent' - * - * getIntegrationNameFromFileDataIndexName('.ds-.fleet-fileds-fromhost-data-agent-2023.06.30-00001'); - * // return 'agent' - */ -export const getIntegrationNameFromFileDataIndexName = (indexName: string): string => { - return getIntegrationNameFromIndexName(indexName, FILE_STORAGE_DATA_INDEX_PATTERN); -}; - -export const getIntegrationNameFromIndexName = ( - indexName: string, - indexPattern: string -): string => { - const integrationNameIndexPosition = indexPattern.split('-').indexOf('*'); - - if (integrationNameIndexPosition === -1) { - throw new Error(`Unable to parse index name. No '*' in index pattern: ${indexPattern}`); - } - - const indexPieces = indexName.replace(/^\.ds-/, '').split('-'); - - if (indexPieces[integrationNameIndexPosition]) { - return indexPieces[integrationNameIndexPosition]; - } - - throw new Error(`Index name ${indexName} does not seem to be a File storage index`); -}; diff --git a/x-pack/plugins/fleet/server/services/files/index.test.ts b/x-pack/plugins/fleet/server/services/files/index.test.ts index f37ff95d1ea96..551cded5d66b2 100644 --- a/x-pack/plugins/fleet/server/services/files/index.test.ts +++ b/x-pack/plugins/fleet/server/services/files/index.test.ts @@ -11,6 +11,8 @@ import { elasticsearchServiceMock } from '@kbn/core/server/mocks'; import { FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_METADATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN, } from '../../../common/constants/file_storage'; import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common/services'; @@ -67,7 +69,7 @@ describe('files service', () => { expect(esClientMock.search).toBeCalledWith( { - index: FILE_STORAGE_METADATA_INDEX_PATTERN, + index: [FILE_STORAGE_METADATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN], body: { size: ES_SEARCH_LIMIT, query: { @@ -130,7 +132,8 @@ describe('files service', () => { expect(esClientMock.search).toBeCalledWith( { - index: FILE_STORAGE_DATA_INDEX_PATTERN, + ignore_unavailable: true, + index: [FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN], body: { size: ES_SEARCH_LIMIT, query: { diff --git a/x-pack/plugins/fleet/server/services/files/index.ts b/x-pack/plugins/fleet/server/services/files/index.ts index 8d6cbdb9fd5a4..48303c3611fc7 100644 --- a/x-pack/plugins/fleet/server/services/files/index.ts +++ b/x-pack/plugins/fleet/server/services/files/index.ts @@ -12,18 +12,19 @@ import type { FileStatus } from '@kbn/files-plugin/common/types'; import { FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_METADATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN, } from '../../../common/constants'; -import { - getFileMetadataIndexName, - getIntegrationNameFromFileDataIndexName, - getIntegrationNameFromIndexName, -} from '../../../common/services'; +import { getFileMetadataIndexName } from '../../../common/services'; import { ES_SEARCH_LIMIT } from '../../../common/constants'; +import { parseFileStorageIndex } from './utils'; + /** - * Gets files with given status + * Gets files with given status from the files metadata index. Includes both files + * `tohost` and files `fromhost` * * @param esClient * @param abortController @@ -37,7 +38,7 @@ export async function getFilesByStatus( const result = await esClient .search( { - index: FILE_STORAGE_METADATA_INDEX_PATTERN, + index: [FILE_STORAGE_METADATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN], body: { size: ES_SEARCH_LIMIT, query: { @@ -79,20 +80,18 @@ export async function fileIdsWithoutChunksByIndex( const noChunkFileIdsByIndex = files.reduce((acc, file) => { allFileIds.add(file._id); - const integration = getIntegrationNameFromIndexName( - file._index, - FILE_STORAGE_METADATA_INDEX_PATTERN - ); - const metadataIndex = getFileMetadataIndexName(integration); + const { index: metadataIndex } = parseFileStorageIndex(file._index); const fileIds = acc[metadataIndex]; + acc[metadataIndex] = fileIds ? fileIds.add(file._id) : new Set([file._id]); + return acc; }, {} as FileIdsByIndex); const chunks = await esClient .search<{ bid: string }>( { - index: FILE_STORAGE_DATA_INDEX_PATTERN, + index: [FILE_STORAGE_DATA_INDEX_PATTERN, FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN], body: { size: ES_SEARCH_LIMIT, query: { @@ -113,6 +112,7 @@ export async function fileIdsWithoutChunksByIndex( }, _source: ['bid'], }, + ignore_unavailable: true, }, { signal: abortController.signal } ) @@ -123,9 +123,12 @@ export async function fileIdsWithoutChunksByIndex( chunks.hits.hits.forEach((hit) => { const fileId = hit._source?.bid; + if (!fileId) return; - const integration = getIntegrationNameFromFileDataIndexName(hit._index); - const metadataIndex = getFileMetadataIndexName(integration); + + const { integration, direction } = parseFileStorageIndex(hit._index); + const metadataIndex = getFileMetadataIndexName(integration, direction === 'to-host'); + if (noChunkFileIdsByIndex[metadataIndex]?.delete(fileId)) { allFileIds.delete(fileId); } diff --git a/x-pack/plugins/fleet/server/services/files/utils.test.ts b/x-pack/plugins/fleet/server/services/files/utils.test.ts new file mode 100644 index 0000000000000..876f2f9143f8f --- /dev/null +++ b/x-pack/plugins/fleet/server/services/files/utils.test.ts @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common'; + +import { parseFileStorageIndex } from './utils'; + +describe('Files service utils', () => { + describe('parseFileStorageIndex()', () => { + it.each([ + [ + 'tohost meta', + '.ds-.fleet-fileds-tohost-meta-endpoint-2023.07.03-000001', + { + index: getFileMetadataIndexName('endpoint', true), + integration: 'endpoint', + direction: 'to-host', + type: 'meta', + }, + ], + [ + 'tohost data', + '.ds-.fleet-fileds-tohost-data-agent-2023.07.03-000001', + { + index: getFileDataIndexName('agent', true), + integration: 'agent', + direction: 'to-host', + type: 'data', + }, + ], + [ + 'fromhost meta', + '.ds-.fleet-fileds-fromhost-meta-agent-2023.07.03-000001', + { + index: getFileMetadataIndexName('agent'), + integration: 'agent', + direction: 'from-host', + type: 'meta', + }, + ], + [ + 'fromhost data', + '.ds-.fleet-fileds-fromhost-data-endpoint-2023.07.03-000001', + { + index: getFileDataIndexName('endpoint'), + integration: 'endpoint', + direction: 'from-host', + type: 'data', + }, + ], + ])('should parse index %s', (_, index, result) => { + expect(parseFileStorageIndex(index)).toEqual(result); + }); + + it('should error if index does not match a known pattern', () => { + expect(() => parseFileStorageIndex('foo')).toThrow( + 'Unable to parse index [foo]. Does not match a known index pattern: [.fleet-fileds-fromhost-meta-* | ' + + '.fleet-fileds-fromhost-data-* | .fleet-fileds-tohost-meta-* | .fleet-fileds-tohost-data-*]' + ); + }); + }); +}); diff --git a/x-pack/plugins/fleet/server/services/files/utils.ts b/x-pack/plugins/fleet/server/services/files/utils.ts new file mode 100644 index 0000000000000..281f14cd7e30b --- /dev/null +++ b/x-pack/plugins/fleet/server/services/files/utils.ts @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getFileDataIndexName, getFileMetadataIndexName } from '../../../common/services'; + +import { + FILE_STORAGE_DATA_INDEX_PATTERN, + FILE_STORAGE_METADATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN, +} from '../../../common/constants'; + +interface ParsedFileStorageIndex { + index: string; + integration: string; + type: 'meta' | 'data'; + direction: 'to-host' | 'from-host'; +} + +/** + * Given a document index (from either a file's metadata doc or a file's chunk doc), utility will + * parse it and return information about that index + * @param index + */ +export const parseFileStorageIndex = (index: string): ParsedFileStorageIndex => { + const response: ParsedFileStorageIndex = { + index: '', + integration: '', + type: 'meta', + direction: 'from-host', + }; + + const fileStorageIndexPatterns = [ + FILE_STORAGE_METADATA_INDEX_PATTERN, + FILE_STORAGE_DATA_INDEX_PATTERN, + + FILE_STORAGE_TO_HOST_METADATA_INDEX_PATTERN, + FILE_STORAGE_TO_HOST_DATA_INDEX_PATTERN, + ]; + + for (const indexPattern of fileStorageIndexPatterns) { + const indexPrefix = indexPattern.substring(0, indexPattern.indexOf('*')); + + if (index.includes(indexPrefix)) { + const isDeliveryToHost = index.includes('-tohost-'); + const isDataIndex = index.includes('host-data-'); + const integrationPosition = indexPattern.split('-').indexOf('*'); + const integration = index + .replace(/^\.ds-/, '') + .split('-') + .at(integrationPosition); + + if (!integration) { + throw new Error(`Index name ${index} does not seem to be a File storage index`); + } + + response.direction = isDeliveryToHost ? 'to-host' : 'from-host'; + response.type = isDataIndex ? 'data' : 'meta'; + response.integration = integration; + response.index = isDataIndex + ? getFileDataIndexName(response.integration, isDeliveryToHost) + : getFileMetadataIndexName(response.integration, isDeliveryToHost); + + return response; + } + } + + throw new Error( + `Unable to parse index [${index}]. Does not match a known index pattern: [${fileStorageIndexPatterns.join( + ' | ' + )}]` + ); +}; diff --git a/x-pack/plugins/fleet/server/tasks/check_deleted_files_task.test.ts b/x-pack/plugins/fleet/server/tasks/check_deleted_files_task.test.ts index 353b52acc8c13..909a78f74ae34 100644 --- a/x-pack/plugins/fleet/server/tasks/check_deleted_files_task.test.ts +++ b/x-pack/plugins/fleet/server/tasks/check_deleted_files_task.test.ts @@ -21,8 +21,8 @@ import { appContextService } from '../services'; import { CheckDeletedFilesTask, TYPE, VERSION } from './check_deleted_files_task'; -const MOCK_FILE_METADATA_INDEX = getFileMetadataIndexName('mock'); -const MOCK_FILE_DATA_INDEX = getFileDataIndexName('mock'); +const MOCK_FILE_METADATA_INDEX = '.ds-' + getFileMetadataIndexName('mock'); +const MOCK_FILE_DATA_INDEX = '.ds-' + getFileDataIndexName('mock'); const MOCK_TASK_INSTANCE = { id: `${TYPE}:${VERSION}`, @@ -100,6 +100,35 @@ describe('check deleted files task', () => { return taskRunner.run(); }; + it('should search both metadata indexes', async () => { + esClient.search.mockResolvedValue({ + took: 5, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 0, + failed: 0, + }, + hits: { + total: { + value: 0, + relation: 'eq', + }, + hits: [], + }, + }); + + await runTask(); + + expect(esClient.search).toHaveBeenCalledWith( + expect.objectContaining({ + index: ['.fleet-fileds-fromhost-meta-*', '.fleet-fileds-tohost-meta-*'], + }), + expect.anything() + ); + }); + it('should attempt to update deleted files', async () => { // mock getReadyFiles search esClient.search @@ -162,7 +191,7 @@ describe('check deleted files task', () => { expect(esClient.updateByQuery).toHaveBeenCalledWith( { - index: MOCK_FILE_METADATA_INDEX, + index: MOCK_FILE_METADATA_INDEX.replace('.ds-', ''), query: { ids: { values: ['metadata-testid2'],