diff --git a/x-pack/plugin/ingest/build.gradle b/x-pack/plugin/ingest/build.gradle new file mode 100644 index 0000000000000..f09c832728880 --- /dev/null +++ b/x-pack/plugin/ingest/build.gradle @@ -0,0 +1,41 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +apply plugin: 'elasticsearch.esplugin' +apply plugin: 'elasticsearch.internal-cluster-test' +esplugin { + name 'x-pack-ingest' + description 'Elasticsearch Expanded Pack Plugin - Ingest' + classname 'org.elasticsearch.xpack.ingest.IngestPlugin' + extendedPlugins = ['x-pack-core'] +} +archivesBaseName = 'x-pack-ingest' + +dependencies { + compileOnly project(path: xpackModule('core'), configuration: 'default') + testImplementation project(path: xpackModule('core'), configuration: 'testArtifacts') + testImplementation project(path: ':modules:ingest-common') + testImplementation project(path: ':modules:lang-mustache') + testImplementation project(path: ':modules:geo') + testImplementation project(path: xpackModule('monitoring'), configuration: 'testArtifacts') +} + +addQaCheckDependencies() + +testingConventions.enabled = false diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java new file mode 100644 index 0000000000000..9f673b34d5119 --- /dev/null +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.Processor; +import org.elasticsearch.plugins.Plugin; + +import java.util.Map; + +public class IngestPlugin extends Plugin implements org.elasticsearch.plugins.IngestPlugin { + + @Override + public Map getProcessors(Processor.Parameters parameters) { + return Map.of(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()); + } +} diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java new file mode 100644 index 0000000000000..2a8d5cd2e8969 --- /dev/null +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java @@ -0,0 +1,123 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; + +public class UriPartsProcessor extends AbstractProcessor { + + public static final String TYPE = "uri_parts"; + + private final String field; + private final String targetField; + private final boolean removeIfSuccessful; + private final boolean keepOriginal; + + UriPartsProcessor(String tag, String description, String field, String targetField, boolean removeIfSuccessful, boolean keepOriginal) { + super(tag, description); + this.field = field; + this.targetField = targetField; + this.removeIfSuccessful = removeIfSuccessful; + this.keepOriginal = keepOriginal; + } + + public String getField() { + return field; + } + + public String getTargetField() { + return targetField; + } + + public boolean getRemoveIfSuccessful() { + return removeIfSuccessful; + } + + public boolean getKeepOriginal() { + return keepOriginal; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + String value = ingestDocument.getFieldValue(field, String.class); + + URI uri; + try { + uri = new URI(value); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("unable to parse URI [" + value + "]"); + } + var uriParts = new HashMap(); + uriParts.put("domain", uri.getHost()); + if (uri.getFragment() != null) { + uriParts.put("fragment", uri.getFragment()); + } + if (keepOriginal) { + uriParts.put("original", value); + } + final String path = uri.getPath(); + if (path != null) { + uriParts.put("path", path); + if (path.contains(".")) { + int periodIndex = path.lastIndexOf('.'); + uriParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); + } + } + if (uri.getPort() != -1) { + uriParts.put("port", uri.getPort()); + } + if (uri.getQuery() != null) { + uriParts.put("query", uri.getQuery()); + } + uriParts.put("scheme", uri.getScheme()); + final String userInfo = uri.getUserInfo(); + if (userInfo != null) { + uriParts.put("user_info", userInfo); + if (userInfo.contains(":")) { + int colonIndex = userInfo.indexOf(":"); + uriParts.put("username", userInfo.substring(0, colonIndex)); + uriParts.put("password", colonIndex < userInfo.length() ? userInfo.substring(colonIndex + 1) : ""); + } + } + + if (removeIfSuccessful && targetField.equals(field) == false) { + ingestDocument.removeField(field); + } + ingestDocument.setFieldValue(targetField, uriParts); + return ingestDocument; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements Processor.Factory { + + @Override + public UriPartsProcessor create( + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { + String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "url"); + boolean removeIfSuccessful = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_if_successful", false); + boolean keepOriginal = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "keep_original", true); + return new UriPartsProcessor(processorTag, description, field, targetField, removeIfSuccessful, keepOriginal); + } + } +} diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java new file mode 100644 index 0000000000000..c4d142b37946c --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class UriPartsProcessorFactoryTests extends ESTestCase { + + private UriPartsProcessor.Factory factory; + + @Before + public void init() { + factory = new UriPartsProcessor.Factory(); + } + + public void testCreate() throws Exception { + Map config = new HashMap<>(); + String field = randomAlphaOfLength(6); + config.put("field", field); + String targetField = "url"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(6); + config.put("target_field", targetField); + } + boolean removeIfSuccessful = randomBoolean(); + config.put("remove_if_successful", removeIfSuccessful); + boolean keepOriginal = randomBoolean(); + config.put("keep_original", keepOriginal); + + String processorTag = randomAlphaOfLength(10); + UriPartsProcessor uriPartsProcessor = factory.create(null, processorTag, null, config); + assertThat(uriPartsProcessor.getTag(), equalTo(processorTag)); + assertThat(uriPartsProcessor.getField(), equalTo(field)); + assertThat(uriPartsProcessor.getTargetField(), equalTo(targetField)); + assertThat(uriPartsProcessor.getRemoveIfSuccessful(), equalTo(removeIfSuccessful)); + assertThat(uriPartsProcessor.getKeepOriginal(), equalTo(keepOriginal)); + } + + public void testCreateNoFieldPresent() throws Exception { + Map config = new HashMap<>(); + config.put("value", "value1"); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (ElasticsearchParseException e) { + assertThat(e.getMessage(), equalTo("[field] required property is missing")); + } + } + + public void testCreateNullField() throws Exception { + Map config = new HashMap<>(); + config.put("field", null); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (ElasticsearchParseException e) { + assertThat(e.getMessage(), equalTo("[field] required property is missing")); + } + } +} diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java new file mode 100644 index 0000000000000..2153e34db2ac9 --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; + +public class UriPartsProcessorTests extends ESTestCase { + + public void testUriParts() throws Exception { + + // simple URI + testUriParsing("http://www.google.com", Map.of("scheme", "http", "domain", "www.google.com", "path", "")); + + // custom port + testUriParsing("http://www.google.com:88", Map.of("scheme", "http", "domain", "www.google.com", "path", "", "port", 88)); + + // file + testUriParsing( + "http://www.google.com:88/google.png", + Map.of("scheme", "http", "domain", "www.google.com", "extension", "png", "path", "/google.png", "port", 88) + ); + + // fragment + testUriParsing( + "https://www.google.com:88/foo#bar", + Map.of("scheme", "https", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); + + // path, extension + testUriParsing( + "https://www.google.com:88/foo.jpg", + Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo.jpg", "extension", "jpg", "port", 88) + ); + + // query + testUriParsing( + "https://www.google.com:88/foo?key=val", + Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo", "query", "key=val", "port", 88) + ); + + // user_info + testUriParsing( + "https://user:pw@www.google.com:88/foo", + Map.of( + "scheme", + "https", + "domain", + "www.google.com", + "path", + "/foo", + "port", + 88, + "user_info", + "user:pw", + "username", + "user", + "password", + "pw" + ) + ); + + // user_info without password + testUriParsing( + "https://user:@www.google.com:88/foo", + Map.of( + "scheme", + "https", + "domain", + "www.google.com", + "path", + "/foo", + "port", + 88, + "user_info", + "user:", + "username", + "user", + "password", + "" + ) + ); + + // everything! + testUriParsing( + "https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", + Map.of( + "scheme", + "https", + "domain", + "testing.google.com", + "fragment", + "anchorVal", + "path", + "/foo/bar", + "port", + 8080, + "username", + "user", + "password", + "pw", + "user_info", + "user:pw", + "query", + "foo1=bar1&foo2=bar2" + ) + ); + + // non-http schemes + testUriParsing( + "ftp://ftp.is.co.za/rfc/rfc1808.txt", + Map.of("scheme", "ftp", "path", "/rfc/rfc1808.txt", "extension", "txt", "domain", "ftp.is.co.za") + ); + + testUriParsing("telnet://192.0.2.16:80/", Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16")); + + testUriParsing( + "ldap://[2001:db8::7]/c=GB?objectClass?one", + Map.of("scheme", "ldap", "path", "/c=GB", "query", "objectClass?one", "domain", "[2001:db8::7]") + ); + + // keep original + testUriParsing( + true, + false, + "http://www.google.com:88/foo#bar", + Map.of("scheme", "http", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); + + // remove if successful + testUriParsing( + false, + true, + "http://www.google.com:88/foo#bar", + Map.of("scheme", "http", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); + } + + public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception { + String field = "field"; + UriPartsProcessor processor = new UriPartsProcessor(null, null, field, field, true, false); + + Map source = new HashMap<>(); + source.put(field, "http://www.google.com"); + IngestDocument input = new IngestDocument(source, Map.of()); + IngestDocument output = processor.execute(input); + + Map expectedSourceAndMetadata = new HashMap<>(); + expectedSourceAndMetadata.put(field, Map.of("scheme", "http", "domain", "www.google.com", "path", "")); + assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); + } + + public void testInvalidUri() { + String uri = "not:\\/_a_valid_uri"; + UriPartsProcessor processor = new UriPartsProcessor(null, null, "field", "url", true, false); + + Map source = new HashMap<>(); + source.put("field", uri); + IngestDocument input = new IngestDocument(source, Map.of()); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.execute(input)); + assertThat(e.getMessage(), containsString("unable to parse URI [" + uri + "]")); + } + + private void testUriParsing(String uri, Map expectedValues) throws Exception { + testUriParsing(false, false, uri, expectedValues); + } + + private void testUriParsing(boolean keepOriginal, boolean removeIfSuccessful, String uri, Map expectedValues) + throws Exception { + UriPartsProcessor processor = new UriPartsProcessor(null, null, "field", "url", removeIfSuccessful, keepOriginal); + + Map source = new HashMap<>(); + source.put("field", uri); + IngestDocument input = new IngestDocument(source, Map.of()); + IngestDocument output = processor.execute(input); + + Map expectedSourceAndMetadata = new HashMap<>(); + + if (removeIfSuccessful == false) { + expectedSourceAndMetadata.put("field", uri); + } + + Map values; + if (keepOriginal) { + values = new HashMap<>(expectedValues); + values.put("original", uri); + } else { + values = expectedValues; + } + expectedSourceAndMetadata.put("url", values); + + assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); + } + +}