-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6844 from Bargs/ingest/bulkAPI
[API] Add CSV bulk indexing support to Kibana API
- Loading branch information
Showing
13 changed files
with
2,309 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
99 changes: 99 additions & 0 deletions
99
src/plugins/kibana/server/routes/api/ingest/register_data.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import { Promise } from 'bluebird'; | ||
import parse from 'csv-parse'; | ||
import _ from 'lodash'; | ||
import hi from 'highland'; | ||
import { patternToIngest } from '../../../../common/lib/convert_pattern_and_ingest_name'; | ||
import { PassThrough } from 'stream'; | ||
import JSONStream from 'JSONStream'; | ||
|
||
const ONE_GIGABYTE = 1024 * 1024 * 1024; | ||
|
||
export function registerData(server) { | ||
server.route({ | ||
path: '/api/kibana/{id}/_data', | ||
method: 'POST', | ||
config: { | ||
payload: { | ||
output: 'stream', | ||
maxBytes: ONE_GIGABYTE | ||
} | ||
}, | ||
handler: function (req, reply) { | ||
const boundCallWithRequest = _.partial(server.plugins.elasticsearch.callWithRequest, req); | ||
const indexPattern = req.params.id; | ||
const usePipeline = req.query.pipeline === 'true'; | ||
const delimiter = _.get(req.query, 'csv_delimiter', ','); | ||
const responseStream = new PassThrough(); | ||
const parser = parse({ | ||
columns: true, | ||
auto_parse: true, | ||
delimiter: delimiter, | ||
skip_empty_lines: true | ||
}); | ||
|
||
const csv = req.payload.csv ? req.payload.csv : req.payload; | ||
const fileName = req.payload.csv ? csv.hapi.filename : ''; | ||
|
||
let currentLine = 2; // Starts at 2 since we parse the header separately | ||
|
||
csv.pipe(parser); | ||
|
||
hi(parser) | ||
.consume((err, doc, push, next) => { | ||
if (err) { | ||
push(err, null); | ||
next(); | ||
} | ||
else if (doc === hi.nil) { | ||
// pass nil (end event) along the stream | ||
push(null, doc); | ||
} | ||
else { | ||
push(null, {index: _.isEmpty(fileName) ? {} : {_id: `${fileName}:${currentLine}`}}); | ||
push(null, doc); | ||
currentLine++; | ||
next(); | ||
} | ||
}) | ||
.batch(200) | ||
.map((bulkBody) => { | ||
const bulkParams = { | ||
index: indexPattern, | ||
type: 'default', | ||
body: bulkBody | ||
}; | ||
|
||
if (usePipeline) { | ||
bulkParams.pipeline = patternToIngest(indexPattern); | ||
} | ||
|
||
return hi(boundCallWithRequest('bulk', bulkParams)); | ||
}) | ||
.parallel(2) | ||
.map((response) => { | ||
return _.reduce(response.items, (memo, docResponse) => { | ||
const indexResult = docResponse.index; | ||
if (indexResult.error) { | ||
const hasIndexingErrors = _.isUndefined(_.get(memo, 'errors.index')); | ||
if (hasIndexingErrors) { | ||
_.set(memo, 'errors.index', []); | ||
} | ||
memo.errors.index.push(_.pick(indexResult, ['_id', 'error'])); | ||
} | ||
else { | ||
memo.created++; | ||
} | ||
|
||
return memo; | ||
}, {created: 0}); | ||
}) | ||
.stopOnError((err, push) => { | ||
push(null, {created: 0, errors: {other: [err.message]}}); | ||
}) | ||
.pipe(JSONStream.stringify()) | ||
.pipe(responseStream); | ||
|
||
reply(responseStream).type('application/json'); | ||
} | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
define(function (require) { | ||
var Promise = require('bluebird'); | ||
var _ = require('intern/dojo/node!lodash'); | ||
var expect = require('intern/dojo/node!expect.js'); | ||
var fakeNamesIndexTemplate = require('intern/dojo/node!../../fixtures/fake_names_index_template.json'); | ||
var fs = require('intern/dojo/node!fs'); | ||
|
||
return function (bdd, scenarioManager, request) { | ||
const es = scenarioManager.client; | ||
bdd.describe('_data', function () { | ||
|
||
bdd.beforeEach(function () { | ||
return es.indices.putTemplate({ | ||
name: 'names', | ||
body: fakeNamesIndexTemplate | ||
}); | ||
}); | ||
|
||
bdd.afterEach(function () { | ||
return es.indices.delete({ | ||
index: 'names', | ||
ignore: 404 | ||
}) | ||
.then(() => { | ||
return es.indices.deleteTemplate({name: 'names'}); | ||
}); | ||
}); | ||
|
||
bdd.it('should accept a multipart/form-data request with a csv file attached', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect(200); | ||
}); | ||
|
||
bdd.it('should also accept the raw csv data in the payload body', function () { | ||
var csvData = fs.readFileSync('test/unit/fixtures/fake_names_big.csv', {encoding: 'utf8'}); | ||
|
||
return request.post('/kibana/names/_data') | ||
.send(csvData) | ||
.expect(200); | ||
}); | ||
|
||
bdd.it('should return JSON results', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect('Content-Type', /json/) | ||
.expect(200); | ||
}); | ||
|
||
bdd.it('should index one document per row in the csv', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect(200) | ||
.then(() => { | ||
return es.indices.refresh() | ||
.then(() => { | ||
return es.count({ index: 'names' }) | ||
.then((res) => { | ||
expect(res.count).to.be(100); | ||
}); | ||
}); | ||
}); | ||
}); | ||
|
||
bdd.it('should stream a chunked response', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect('Transfer-Encoding', 'chunked') | ||
.expect(200); | ||
}); | ||
|
||
bdd.it('should respond with an array of one or more "result objects"', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names_big.csv') | ||
.expect(200) | ||
.then((dataResponse) => { | ||
expect(dataResponse.body.length).to.be(14); | ||
}); | ||
}); | ||
|
||
bdd.describe('result objects', function () { | ||
|
||
bdd.it('should include a count of created documents', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect(200) | ||
.then((dataResponse) => { | ||
expect(dataResponse.body[0]).to.have.property('created'); | ||
expect(dataResponse.body[0].created).to.be(100); | ||
}); | ||
}); | ||
|
||
bdd.it('should report any indexing errors per document under an "errors.index" key', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names_with_mapping_errors.csv') | ||
.expect(200) | ||
.then((dataResponse) => { | ||
expect(dataResponse.body[0]).to.have.property('created'); | ||
expect(dataResponse.body[0].created).to.be(98); | ||
expect(dataResponse.body[0]).to.have.property('errors'); | ||
expect(dataResponse.body[0].errors).to.have.property('index'); | ||
expect(dataResponse.body[0].errors.index.length).to.be(2); | ||
}); | ||
}); | ||
|
||
bdd.it('should report any csv parsing errors under an "errors.other" key', function () { | ||
return request.post('/kibana/names/_data') | ||
.attach('csv', 'test/unit/fixtures/fake_names_with_parse_errors.csv') | ||
.expect(200) | ||
.then((dataResponse) => { | ||
// parse errors immediately abort indexing | ||
expect(dataResponse.body[0]).to.have.property('created'); | ||
expect(dataResponse.body[0].created).to.be(0); | ||
|
||
expect(dataResponse.body[0]).to.have.property('errors'); | ||
expect(dataResponse.body[0].errors).to.have.property('other'); | ||
expect(dataResponse.body[0].errors.other.length).to.be(1); | ||
}); | ||
}); | ||
|
||
}); | ||
|
||
bdd.describe('optional parameters', function () { | ||
bdd.it('should accept a custom csv_delimiter query string param for parsing the CSV', function () { | ||
return request.post('/kibana/names/_data?csv_delimiter=|') | ||
.attach('csv', 'test/unit/fixtures/fake_names_pipe_delimited.csv') | ||
.expect(200) | ||
.then((dataResponse) => { | ||
expect(dataResponse.body[0]).to.have.property('created'); | ||
expect(dataResponse.body[0].created).to.be(2); | ||
expect(dataResponse.body[0]).to.not.have.property('errors'); | ||
|
||
return es.indices.refresh(); | ||
}) | ||
.then(() => { | ||
return es.search({ | ||
index: 'names' | ||
}); | ||
}) | ||
.then((searchResponse) => { | ||
const doc = _.get(searchResponse, 'hits.hits[0]._source'); | ||
expect(doc).to.only.have.keys('Number', 'Gender', 'NameSet'); | ||
}); | ||
}); | ||
|
||
bdd.it('should accept a boolean pipeline query string parameter enabling use of the index pattern\'s associated pipeline', | ||
function () { | ||
return es.transport.request({ | ||
path: '_ingest/pipeline/kibana-names', | ||
method: 'put', | ||
body: { | ||
processors: [ | ||
{ | ||
set: { | ||
field: 'foo', | ||
value: 'bar' | ||
} | ||
} | ||
] | ||
} | ||
}) | ||
.then((res) => { | ||
return request.post('/kibana/names/_data?pipeline=true') | ||
.attach('csv', 'test/unit/fixtures/fake_names.csv') | ||
.expect(200); | ||
}) | ||
.then(() => { | ||
return es.indices.refresh(); | ||
}) | ||
.then(() => { | ||
return es.search({ | ||
index: 'names' | ||
}); | ||
}) | ||
.then((searchResponse) => { | ||
_.forEach(searchResponse.hits.hits, (doc) => { | ||
expect(doc._source).to.have.property('foo'); | ||
expect(doc._source.foo).to.be('bar'); | ||
}); | ||
return searchResponse; | ||
}) | ||
.finally(() => { | ||
return es.transport.request({ | ||
path: '_ingest/pipeline/kibana-names', | ||
method: 'delete' | ||
}); | ||
}); | ||
}); | ||
}); | ||
|
||
}); | ||
}; | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.