Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Registered domain processor #67611

Merged
merged 14 commits into from
Apr 14, 2021
16 changes: 6 additions & 10 deletions modules/ingest-common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ dependencies {
compileOnly project(':modules:lang-painless')
api project(':libs:elasticsearch-grok')
api project(':libs:elasticsearch-dissect')
implementation "org.apache.httpcomponents:httpclient:${versions.httpclient}"
implementation "org.apache.httpcomponents:httpcore:${versions.httpcore}"
}

restResources {
Expand All @@ -35,16 +37,10 @@ testClusters.all {

tasks.named("thirdPartyAudit").configure {
ignoreMissingClasses(
// from log4j
'org.osgi.framework.AdaptPermission',
'org.osgi.framework.AdminPermission',
'org.osgi.framework.Bundle',
'org.osgi.framework.BundleActivator',
'org.osgi.framework.BundleContext',
'org.osgi.framework.BundleEvent',
'org.osgi.framework.SynchronousBundleListener',
'org.osgi.framework.wiring.BundleWire',
'org.osgi.framework.wiring.BundleWiring'
//commons-logging
'org.apache.commons.codec.binary.Base64',
'org.apache.commons.logging.Log',
'org.apache.commons.logging.LogFactory',
)
}

Expand Down
1 change: 1 addition & 0 deletions modules/ingest-common/licenses/httpclient-4.5.10.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7ca2e4276f4ef95e4db725a8cd4a1d1e7585b9e5
558 changes: 558 additions & 0 deletions modules/ingest-common/licenses/httpclient-LICENSE.txt

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions modules/ingest-common/licenses/httpclient-NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Apache HttpComponents Client
Copyright 1999-2016 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

1 change: 1 addition & 0 deletions modules/ingest-common/licenses/httpcore-4.4.12.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
21ebaf6d532bc350ba95bd81938fa5f0e511c132
241 changes: 241 additions & 0 deletions modules/ingest-common/licenses/httpcore-LICENSE.txt

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions modules/ingest-common/licenses/httpcore-NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Apache HttpComponents Core
Copyright 2005-2014 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

This project contains annotations derived from JCIP-ANNOTATIONS
Copyright (c) 2005 Brian Goetz and Tim Peierls. See http://www.jcip.net
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
entry(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()),
entry(NetworkDirectionProcessor.TYPE, new NetworkDirectionProcessor.Factory()),
entry(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()),
entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory())
entry(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()),
entry(RegisteredDomainProcessor.TYPE, new RegisteredDomainProcessor.Factory())
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.ingest.common;

import org.apache.http.conn.util.PublicSuffixMatcher;
import org.apache.http.conn.util.PublicSuffixMatcherLoader;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.ConfigurationUtils;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;

import java.util.Map;

public class RegisteredDomainProcessor extends AbstractProcessor {
private static final PublicSuffixMatcher SUFFIX_MATCHER = PublicSuffixMatcherLoader.getDefault();

public static final String TYPE = "registered_domain";

private final String field;
private final String targetField;
private final boolean ignoreMissing;

RegisteredDomainProcessor(
String tag,
String description,
String field,
String targetField,
boolean ignoreMissing
) {
super(tag, description);
this.field = field;
this.targetField = targetField;
this.ignoreMissing = ignoreMissing;
}

public String getField() {
return field;
}

public String getTargetField() {
return targetField;
}

public boolean getIgnoreMissing() {
return ignoreMissing;
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
DomainInfo info = getRegisteredDomain(ingestDocument);
if (info == null) {
if (ignoreMissing) {
return ingestDocument;
} else {
throw new IllegalArgumentException("unable to set domain information for document");
}
}
String fieldPrefix = targetField;
if (fieldPrefix.equals("") == false) {
fieldPrefix += ".";
}
String domainTarget = fieldPrefix + "domain";
String registeredDomainTarget = fieldPrefix + "registered_domain";
String subdomainTarget = fieldPrefix + "subdomain";
String topLevelDomainTarget = fieldPrefix + "top_level_domain";

if (info.getDomain() != null) {
ingestDocument.setFieldValue(domainTarget, info.getDomain());
}
if (info.getRegisteredDomain() != null) {
ingestDocument.setFieldValue(registeredDomainTarget, info.getRegisteredDomain());
}
if (info.getETLD() != null) {
ingestDocument.setFieldValue(topLevelDomainTarget, info.getETLD());
}
if (info.getSubdomain() != null) {
ingestDocument.setFieldValue(subdomainTarget, info.getSubdomain());
}
return ingestDocument;
}

private DomainInfo getRegisteredDomain(IngestDocument d) {
String fieldString = d.getFieldValue(field, String.class, ignoreMissing);
if (fieldString == null) {
return null;
}
String registeredDomain = SUFFIX_MATCHER.getDomainRoot(fieldString);
if (registeredDomain == null) {
if (SUFFIX_MATCHER.matches(fieldString)) {
return new DomainInfo(fieldString);
}
return null;
}
if (registeredDomain.indexOf(".") == -1) {
// we have domain with no matching public suffix, but "." in it
return null;
}
return new DomainInfo(registeredDomain, fieldString);
}

@Override
public String getType() {
return TYPE;
}

private class DomainInfo {
private final String domain;
private final String registeredDomain;
private final String eTLD;
private final String subdomain;

private DomainInfo(String eTLD) {
this.domain = eTLD;
this.eTLD = eTLD;
this.registeredDomain = null;
this.subdomain = null;
}

private DomainInfo(String registeredDomain, String domain) {
int index = registeredDomain.indexOf(".") + 1;
if (index > 0 && index < registeredDomain.length()) {
this.domain = domain;
this.eTLD = registeredDomain.substring(index);
this.registeredDomain = registeredDomain;
int subdomainIndex = domain.lastIndexOf("." + registeredDomain);
if (subdomainIndex > 0) {
this.subdomain = domain.substring(0, subdomainIndex);
} else {
this.subdomain = null;
}
} else {
this.domain = null;
this.eTLD = null;
this.registeredDomain = null;
this.subdomain = null;
}
}

public String getDomain() {
return domain;
}

public String getSubdomain() {
return subdomain;
}

public String getRegisteredDomain() {
return registeredDomain;
}

public String getETLD() {
return eTLD;
}
}

public static final class Factory implements Processor.Factory {

static final String DEFAULT_TARGET_FIELD = "";

@Override
public RegisteredDomainProcessor create(
Map<String, Processor.Factory> registry,
String processorTag,
String description,
Map<String, Object> config
) throws Exception {
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET_FIELD);
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", true);

return new RegisteredDomainProcessor(
processorTag,
description,
field,
targetField,
ignoreMissing
);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.ingest.common;

import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;

import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.CoreMatchers.equalTo;

public class RegisteredDomainProcessorFactoryTests extends ESTestCase {

private RegisteredDomainProcessor.Factory factory;

@Before
public void init() {
factory = new RegisteredDomainProcessor.Factory();
}

public void testCreate() throws Exception {
Map<String, Object> config = new HashMap<>();

String field = randomAlphaOfLength(6);
config.put("field", field);
String targetField = randomAlphaOfLength(6);
config.put("target_field", targetField);
boolean ignoreMissing = randomBoolean();
config.put("ignore_missing", ignoreMissing);

String processorTag = randomAlphaOfLength(10);
RegisteredDomainProcessor publicSuffixProcessor = factory.create(null, processorTag, null, config);
assertThat(publicSuffixProcessor.getTag(), equalTo(processorTag));
assertThat(publicSuffixProcessor.getTargetField(), equalTo(targetField));
assertThat(publicSuffixProcessor.getIgnoreMissing(), equalTo(ignoreMissing));
}

public void testCreateDefaults() throws Exception {
Map<String, Object> config = new HashMap<>();

String field = randomAlphaOfLength(6);
config.put("field", field);

String processorTag = randomAlphaOfLength(10);
RegisteredDomainProcessor publicSuffixProcessor = factory.create(null, processorTag, null, config);
assertThat(publicSuffixProcessor.getTargetField(), equalTo(RegisteredDomainProcessor.Factory.DEFAULT_TARGET_FIELD));
}


public void testFieldRequired() throws Exception {
HashMap<String, Object> config = new HashMap<>();
String processorTag = randomAlphaOfLength(10);
try {
factory.create(null, processorTag, null, config);
fail("factory create should have failed");
} catch (ElasticsearchParseException e) {
assertThat(e.getMessage(), equalTo("[field] required property is missing"));
}
}
}
Loading