diff --git a/internal/docs/exported_fields.go b/internal/docs/exported_fields.go index d78998bdd..59babdae9 100644 --- a/internal/docs/exported_fields.go +++ b/internal/docs/exported_fields.go @@ -147,6 +147,14 @@ func visitFields(namePrefix string, f fields.FieldDefinition, records []fieldsTa unit: f.Unit, metricType: f.MetricType, }) + + for _, multiField := range f.MultiFields { + records = append(records, fieldsTableRecord{ + name: name + "." + multiField.Name, + description: fmt.Sprintf("Multi-field of %#q.", name), + aType: multiField.Type, + }) + } return records, nil } diff --git a/internal/fields/dependency_manager.go b/internal/fields/dependency_manager.go index a4c76e935..3e5c11f11 100644 --- a/internal/fields/dependency_manager.go +++ b/internal/fields/dependency_manager.go @@ -208,9 +208,21 @@ func buildFieldPath(root string, field common.MapStr) string { func transformImportedField(fd FieldDefinition) common.MapStr { m := common.MapStr{ - "name": fd.Name, - "description": fd.Description, - "type": fd.Type, + "name": fd.Name, + "type": fd.Type, + } + + // Multi-fields don't have descriptions. + if fd.Description != "" { + m["description"] = fd.Description + } + + if fd.Index != nil { + m["index"] = *fd.Index + } + + if fd.DocValues != nil { + m["doc_values"] = *fd.DocValues } if len(fd.Fields) > 0 { @@ -221,5 +233,14 @@ func transformImportedField(fd FieldDefinition) common.MapStr { } m.Put("fields", t) } + + if len(fd.MultiFields) > 0 { + var t []common.MapStr + for _, f := range fd.MultiFields { + i := transformImportedField(f) + t = append(t, i) + } + m.Put("multi_fields", t) + } return m } diff --git a/internal/fields/dependency_manager_test.go b/internal/fields/dependency_manager_test.go index eca32a47b..10b4e6347 100644 --- a/internal/fields/dependency_manager_test.go +++ b/internal/fields/dependency_manager_test.go @@ -116,6 +116,71 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { changed: true, valid: true, }, + { + title: "multi fields", + defs: []common.MapStr{ + { + "name": "process.command_line", + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "process.command_line", + "type": "wildcard", + "description": "Full command line that started the process.", + "multi_fields": []common.MapStr{ + { + "name": "text", + "type": "match_only_text", + }, + }, + }, + }, + changed: true, + valid: true, + }, + { + title: "not indexed external", + defs: []common.MapStr{ + { + "name": "event.original", + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "event.original", + "type": "text", + "description": "Original event.", + "index": false, + "doc_values": false, + }, + }, + changed: true, + valid: true, + }, + { + title: "override not indexed external", + defs: []common.MapStr{ + { + "name": "event.original", + "index": true, + "external": "test", + }, + }, + result: []common.MapStr{ + { + "name": "event.original", + "type": "text", + "description": "Original event.", + "index": true, + "doc_values": false, + }, + }, + changed: true, + valid: true, + }, { title: "unknown field", defs: []common.MapStr{ @@ -128,6 +193,7 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { }, } + indexFalse := false schema := map[string][]FieldDefinition{"test": []FieldDefinition{ { Name: "container.id", @@ -144,6 +210,24 @@ func TestDependencyManagerInjectExternalFields(t *testing.T) { Description: "Data stream dataset.", Type: "constant_keyword", }, + { + Name: "process.command_line", + Description: "Full command line that started the process.", + Type: "wildcard", + MultiFields: []FieldDefinition{ + { + Name: "text", + Type: "match_only_text", + }, + }, + }, + { + Name: "event.original", + Description: "Original event.", + Type: "text", + Index: &indexFalse, + DocValues: &indexFalse, + }, }} dm := &DependencyManager{schema: schema} diff --git a/internal/fields/model.go b/internal/fields/model.go index 19cff7d71..7ac71e754 100644 --- a/internal/fields/model.go +++ b/internal/fields/model.go @@ -14,7 +14,10 @@ type FieldDefinition struct { Unit string `yaml:"unit"` MetricType string `yaml:"metric_type"` External string `yaml:"external"` - Fields []FieldDefinition `yaml:"fields"` + Index *bool `yaml:"index"` + DocValues *bool `yaml:"doc_values"` + Fields []FieldDefinition `yaml:"fields,omitempty"` + MultiFields []FieldDefinition `yaml:"multi_fields,omitempty"` } func (orig *FieldDefinition) Update(fd FieldDefinition) { @@ -42,26 +45,40 @@ func (orig *FieldDefinition) Update(fd FieldDefinition) { if fd.External != "" { orig.External = fd.External } + if fd.Index != nil { + orig.Index = fd.Index + } + if fd.DocValues != nil { + orig.DocValues = fd.DocValues + } if len(fd.Fields) > 0 { - // When a subfield the same name exists, update it. When not, append it. - updatedFields := make([]FieldDefinition, len(orig.Fields)) - copy(updatedFields, orig.Fields) - for _, newField := range fd.Fields { - found := false - for i, origField := range orig.Fields { - if origField.Name != newField.Name { - continue - } + orig.Fields = updateFields(orig.Fields, fd.Fields) + } - found = true - updatedFields[i].Update(newField) - break - } - if !found { - updatedFields = append(updatedFields, newField) + if len(fd.MultiFields) > 0 { + orig.MultiFields = updateFields(orig.MultiFields, fd.MultiFields) + } +} + +func updateFields(origFields, fields []FieldDefinition) []FieldDefinition { + // When a subfield the same name exists, update it. When not, append it. + updatedFields := make([]FieldDefinition, len(origFields)) + copy(updatedFields, origFields) + for _, newField := range fields { + found := false + for i, origField := range origFields { + if origField.Name != newField.Name { + continue } + + found = true + updatedFields[i].Update(newField) + break + } + if !found { + updatedFields = append(updatedFields, newField) } - orig.Fields = updatedFields } + return updatedFields } diff --git a/test/packages/parallel/apache/data_stream/access/fields/ecs.yml b/test/packages/parallel/apache/data_stream/access/fields/ecs.yml index 12993b026..f05ee491d 100644 --- a/test/packages/parallel/apache/data_stream/access/fields/ecs.yml +++ b/test/packages/parallel/apache/data_stream/access/fields/ecs.yml @@ -10,6 +10,8 @@ name: event.created - external: ecs name: event.kind +- external: ecs + name: event.original - external: ecs name: event.outcome - external: ecs @@ -30,6 +32,8 @@ name: log.level - external: ecs name: message +- external: ecs + name: process.command_line - external: ecs name: process.pid - external: ecs diff --git a/test/packages/parallel/apache/docs/README.md b/test/packages/parallel/apache/docs/README.md index d00e556d0..f94910d4d 100644 --- a/test/packages/parallel/apache/docs/README.md +++ b/test/packages/parallel/apache/docs/README.md @@ -45,8 +45,10 @@ Access logs collects the Apache access logs. | event.dataset | Event dataset | constant_keyword | | event.kind | This is one of four ECS Categorization Fields, and indicates the highest level in the ECS category hierarchy. `event.kind` gives high-level information about what type of information the event contains, without being specific to the contents of the event. For example, values of this field distinguish alert events from metric events. The value of this field can be used to inform how these kinds of events should be handled. They may warrant different retention, different access control, it may also help understand whether the data coming in at a regular interval or not. | keyword | | event.module | Event module | constant_keyword | +| event.original | Raw text message of entire event. Used to demonstrate log integrity or where the full log message (before splitting it up in multiple parts) may be required, e.g. for reindex. This field is not indexed and doc_values are disabled. It cannot be searched, but it can be retrieved from `_source`. If users wish to override this and index this field, please see `Field data types` in the `Elasticsearch Reference`. | keyword | | event.outcome | This is one of four ECS Categorization Fields, and indicates the lowest level in the ECS category hierarchy. `event.outcome` simply denotes whether the event represents a success or a failure from the perspective of the entity that produced the event. Note that when a single transaction is described in multiple events, each event may populate different values of `event.outcome`, according to their perspective. Also note that in the case of a compound event (a single event that contains multiple logical events), this field should be populated with the value that best captures the overall success or failure from the perspective of the event producer. Further note that not all events will have an associated outcome. For example, this field is generally not populated for metric events, events with `event.type:info`, or any events for which an outcome does not make logical sense. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | +| file.path.text | Multi-field of `file.path`. | match_only_text | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -60,6 +62,7 @@ Access logs collects the Apache access logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -73,11 +76,14 @@ Access logs collects the Apache access logs. | log.level | Original log level of the log event. If the source of the event provides a log level or textual severity, this is the one that goes in `log.level`. If your source doesn't specify one, you may put your event transport's severity here (e.g. Syslog severity). Some examples are `warn`, `err`, `i`, `informational`. | keyword | | log.offset | Log offset | long | | message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text | +| process.command_line | Full command line that started the process, including the absolute path to the executable, and all arguments. Some arguments may be filtered to protect sensitive information. | wildcard | +| process.command_line.text | Multi-field of `process.command_line`. | match_only_text | | process.pid | Process id. | long | | process.thread.id | Thread ID. | long | | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.domain | Source domain. | keyword | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | @@ -94,14 +100,19 @@ Access logs collects the Apache access logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.full | Operating system name, including the version or code name. | keyword | +| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | | user_agent.os.version | Operating system version as a raw string. | keyword | | user_agent.version | Version of the user agent. | keyword | @@ -141,6 +152,7 @@ Error logs collects the Apache error logs. | event.timezone | This field should be populated when the event's timestamp does not include timezone information already (e.g. default Syslog timestamps). It's optional otherwise. Acceptable timezone formats are: a canonical ID (e.g. "Europe/Amsterdam"), abbreviated (e.g. "EST") or an HH:mm differential (e.g. "-05:00"). | keyword | | event.type | This is one of four ECS Categorization Fields, and indicates the third level in the ECS category hierarchy. `event.type` represents a categorization "sub-bucket" that, when used along with the `event.category` field values, enables filtering events down to a level appropriate for single visualization. This field is an array. This will allow proper categorization of some events that fall in multiple event types. | keyword | | file.path | Full path to the file, including the file name. It should include the drive letter, when appropriate. | keyword | +| file.path.text | Multi-field of `file.path`. | match_only_text | | host.architecture | Operating system architecture. | keyword | | host.containerized | If the host is a container. | boolean | | host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | @@ -154,6 +166,7 @@ Error logs collects the Apache error logs. | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -172,6 +185,7 @@ Error logs collects the Apache error logs. | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | | source.geo.country_iso_code | Country ISO code. | keyword | @@ -185,13 +199,17 @@ Error logs collects the Apache error logs. | url.domain | Domain of the url, such as "www.elastic.co". In some cases a URL may refer to an IP and/or port directly, without a domain name. In this case, the IP address would go to the `domain` field. If the URL contains a literal IPv6 address enclosed by `[` and `]` (IETF RFC 2732), the `[` and `]` characters should also be captured in the `domain` field. | keyword | | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.query | The query field describes the query string of the request, such as "q=elasticsearch". The `?` is excluded from the query string. If a URL contains no `?`, there is no query field. If there is a `?` but no query, the query field exists with an empty string. The `exists` query can be used to differentiate between the two cases. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | ## Metrics @@ -378,6 +396,7 @@ An example event for `status` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | | | host.os.name | Operating system name, without the version. | keyword | | | +| host.os.name.text | Multi-field of `host.os.name`. | text | | | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | | | host.os.version | Operating system version as a raw string. | keyword | | | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | | | diff --git a/test/packages/parallel/gcp/docs/compute.md b/test/packages/parallel/gcp/docs/compute.md index 9b301167e..c3c4b42f5 100644 --- a/test/packages/parallel/gcp/docs/compute.md +++ b/test/packages/parallel/gcp/docs/compute.md @@ -159,6 +159,7 @@ An example event for `compute` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | diff --git a/test/packages/parallel/nginx/docs/README.md b/test/packages/parallel/nginx/docs/README.md index 53ebb724c..b23e8ec4b 100644 --- a/test/packages/parallel/nginx/docs/README.md +++ b/test/packages/parallel/nginx/docs/README.md @@ -178,6 +178,7 @@ An example event for `access` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -194,6 +195,7 @@ An example event for `access` looks as following: | source.address | Some event source addresses are defined ambiguously. The event will sometimes list an IP, a domain or a unix socket. You should always store the raw address in the `.address` field. Then it should be duplicated to `.ip` or `.domain`, depending on which one it is. | keyword | | source.as.number | Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. | long | | source.as.organization.name | Organization name. | keyword | +| source.as.organization.name.text | Multi-field of `source.as.organization.name`. | match_only_text | | source.geo.city_name | City name. | keyword | | source.geo.continent_name | Name of the continent. | keyword | | source.geo.country_iso_code | Country ISO code. | keyword | @@ -207,14 +209,19 @@ An example event for `access` looks as following: | url.extension | The field contains the file extension from the original request url, excluding the leading dot. The file extension is only set if it exists, as not every url has a file extension. The leading period must not be included. For example, the value must be "png", not ".png". Note that when the file name has multiple extensions (example.tar.gz), only the last one should be captured ("gz", not "tar.gz"). | keyword | | url.fragment | Portion of the url after the `#`, such as "top". The `#` is not part of the fragment. | keyword | | url.original | Unmodified original url as seen in the event source. Note that in network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often just represented as a path. This field is meant to represent the URL as it was observed, complete or not. | wildcard | +| url.original.text | Multi-field of `url.original`. | match_only_text | | url.path | Path of the request, such as "/search". | wildcard | | url.scheme | Scheme of the request, such as "https". Note: The `:` is not part of the scheme. | keyword | | user.name | Short name or login of the user. | keyword | +| user.name.text | Multi-field of `user.name`. | match_only_text | | user_agent.device.name | Name of the device. | keyword | | user_agent.name | Name of the user agent. | keyword | | user_agent.original | Unparsed user_agent string. | keyword | +| user_agent.original.text | Multi-field of `user_agent.original`. | match_only_text | | user_agent.os.full | Operating system name, including the version or code name. | keyword | +| user_agent.os.full.text | Multi-field of `user_agent.os.full`. | match_only_text | | user_agent.os.name | Operating system name, without the version. | keyword | +| user_agent.os.name.text | Multi-field of `user_agent.os.name`. | match_only_text | | user_agent.os.version | Operating system version as a raw string. | keyword | | user_agent.version | Version of the user agent. | keyword | @@ -343,6 +350,7 @@ An example event for `error` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | @@ -489,6 +497,7 @@ An example event for `stubstatus` looks as following: | host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | | host.os.kernel | Operating system kernel version as a raw string. | keyword | | host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | text | | host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | | host.os.version | Operating system version as a raw string. | keyword | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword |