Skip to content

Commit

Permalink
Fingerprint processor (elastic#14205)
Browse files Browse the repository at this point in the history
* WIP: fingerprint processor

* Implementing SHA256 fingerprinter

* Sort source fields

* Refactoring

* Add TODO

* Convert time fields to UTC

* Removing unnecessary function

* Adding SHA1

* WIP: add encoding

* Cleanup

* Running mage fmt

* More methods + consolidating tests

* Fleshing out tests

* Adding test for target field

* Adding documentation

* Adding CHANGELOG entry

* Fixing test

* Converting tests to map

* Isolating tests

* Use io.Writer to stream in fields

* Implement ignore_missing setting

* Replace table with definition list

* Adding `ignore_missing` to doc

* using io.Fprintf

* Use common.StringSet

* Adding typed errors

* Adding more typed errors

* Adding license header
  • Loading branch information
ycombinator authored and jorgemarey committed Dec 13, 2019
1 parent d2fb45a commit 6d1c122
Show file tree
Hide file tree
Showing 8 changed files with 722 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add `keep_null` setting to allow Beats to publish null values in events. {issue}5522[5522] {pull}13928[13928]
- Add shared_credential_file option in aws related config for specifying credential file directory. {issue}14157[14157] {pull}14178[14178]
- GA the `script` processor. {pull}14325[14325]
- Add `fingerprint` processor. {issue}11173[11173] {pull}14205[14205]

*Auditbeat*

Expand Down
21 changes: 21 additions & 0 deletions libbeat/docs/processors-using.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,27 @@ The following settings are supported:
empty array (`[]`) or an empty object (`{}`) are considered
empty values. Default is `false`.

[[fingerprint]]
=== Generate a fingerprint of an event

The `fingerprint` processor generates a fingerprint of an event based on a
specified subset of its fields.

[source,yaml]
-----------------------------------------------------
processors:
- fingerprint:
fields: ["field1", "field2", ...]
-----------------------------------------------------

The following settings are supported:

`fields`:: List of fields to use as the source for the fingerprint.
`ignore_missing`:: (Optional) Whether to ignore missing fields. Default is `false`.
`target_field`:: (Optional) Field in which the generated fingerprint should be stored. Default is `fingerprint`.
`method`:: (Optional) Algorithm to use for computing the fingerprint. Must be one of: `md5`, `sha1`, `sha256`, `sha384`, `sha512`. Default is `sha256`.
`encoding`:: (Optional) Encoding to use on the fingerprint value. Must be one of `hex`, `base32`, or `base64`. Default is `hex`.

[[include-fields]]
=== Keep fields from events

Expand Down
36 changes: 36 additions & 0 deletions libbeat/processors/fingerprint/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package fingerprint

// Config for fingerprint processor.
type Config struct {
Method hashMethod `config:"method"` // Hash function to use for fingerprinting
Fields []string `config:"fields" validate:"required"` // Source fields to compute fingerprint from
TargetField string `config:"target_field"` // Target field for the fingerprint
Encoding encodingMethod `config:"encoding"` // Encoding to use for target field value
IgnoreMissing bool `config:"ignore_missing"` // Ignore missing fields?
}

func defaultConfig() Config {
return Config{
Method: hashes["sha256"],
TargetField: "fingerprint",
Encoding: encodings["hex"],
IgnoreMissing: false,
}
}
46 changes: 46 additions & 0 deletions libbeat/processors/fingerprint/encode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package fingerprint

import (
"encoding/base32"
"encoding/base64"
"encoding/hex"
"strings"
)

type encodingMethod func([]byte) string

var encodings = map[string]encodingMethod{
"hex": hex.EncodeToString,
"base32": base32.StdEncoding.EncodeToString,
"base64": base64.StdEncoding.EncodeToString,
}

// Unpack creates the encodingMethod from the given string
func (e *encodingMethod) Unpack(str string) error {
str = strings.ToLower(str)

m, found := encodings[str]
if !found {
return makeErrUnknownEncoding(str)
}

*e = m
return nil
}
79 changes: 79 additions & 0 deletions libbeat/processors/fingerprint/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package fingerprint

import (
"errors"
"fmt"
)

var errNoFields = errors.New("must specify at least one field")

type (
errUnknownEncoding struct{ encoding string }
errUnknownMethod struct{ method string }
errConfigUnpack struct{ cause error }
errComputeFingerprint struct{ cause error }
errMissingField struct {
field string
cause error
}
errNonScalarField struct{ field string }
)

func makeErrUnknownEncoding(encoding string) errUnknownEncoding {
return errUnknownEncoding{encoding}
}
func (e errUnknownEncoding) Error() string {
return fmt.Sprintf("invalid encoding [%s]", e.encoding)
}

func makeErrUnknownMethod(method string) errUnknownMethod {
return errUnknownMethod{method}
}
func (e errUnknownMethod) Error() string {
return fmt.Sprintf("invalid fingerprinting method [%s]", e.method)
}

func makeErrConfigUnpack(cause error) errConfigUnpack {
return errConfigUnpack{cause}
}
func (e errConfigUnpack) Error() string {
return fmt.Sprintf("failed to unpack %v processor configuration: %v", processorName, e.cause)
}

func makeErrComputeFingerprint(cause error) errComputeFingerprint {
return errComputeFingerprint{cause}
}
func (e errComputeFingerprint) Error() string {
return fmt.Sprintf("failed to compute fingerprint: %v", e.cause)
}

func makeErrMissingField(field string, cause error) errMissingField {
return errMissingField{field, cause}
}
func (e errMissingField) Error() string {
return fmt.Sprintf("failed to find field [%v] in event: %v", e.field, e.cause)
}

func makeErrNonScalarField(field string) errNonScalarField {
return errNonScalarField{field}
}
func (e errNonScalarField) Error() string {
return fmt.Sprintf("cannot compute fingerprint using non-scalar field [%v]", e.field)
}
111 changes: 111 additions & 0 deletions libbeat/processors/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package fingerprint

import (
"fmt"
"hash"
"io"
"time"

"github.com/elastic/beats/libbeat/beat"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/processors"
jsprocessor "github.com/elastic/beats/libbeat/processors/script/javascript/module/processor"
)

func init() {
processors.RegisterPlugin("fingerprint", New)
jsprocessor.RegisterPlugin("Fingerprint", New)
}

const processorName = "fingerprint"

type fingerprint struct {
config Config
fields []string
hash hash.Hash
}

// New constructs a new fingerprint processor.
func New(cfg *common.Config) (processors.Processor, error) {
config := defaultConfig()
if err := cfg.Unpack(&config); err != nil {
return nil, makeErrConfigUnpack(err)
}

fields := common.MakeStringSet(config.Fields...)

p := &fingerprint{
config: config,
hash: config.Method(),
fields: fields.ToSlice(),
}

return p, nil
}

// Run enriches the given event with fingerprint information
func (p *fingerprint) Run(event *beat.Event) (*beat.Event, error) {
hashFn := p.hash
hashFn.Reset()

err := p.writeFields(hashFn, event.Fields)
if err != nil {
return nil, makeErrComputeFingerprint(err)
}

hash := hashFn.Sum(nil)
encodedHash := p.config.Encoding(hash)

if _, err = event.PutValue(p.config.TargetField, encodedHash); err != nil {
return nil, makeErrComputeFingerprint(err)
}

return event, nil
}

func (p *fingerprint) String() string {
return fmt.Sprintf("%v=[method=[%v]]", processorName, p.config.Method)
}

func (p *fingerprint) writeFields(to io.Writer, eventFields common.MapStr) error {
for _, k := range p.fields {
v, err := eventFields.GetValue(k)
if err != nil {
if p.config.IgnoreMissing {
continue
}
return makeErrMissingField(k, err)
}

i := v
switch vv := v.(type) {
case map[string]interface{}, []interface{}, common.MapStr:
return makeErrNonScalarField(k)
case time.Time:
// Ensure we consistently hash times in UTC.
i = vv.UTC()
}

fmt.Fprintf(to, "|%v|%v", k, i)
}

io.WriteString(to, "|")
return nil
}
Loading

0 comments on commit 6d1c122

Please sign in to comment.