From df786f8cebc52c07e4eb3819b7e40bc3327875af Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Tue, 17 Nov 2020 10:03:04 -0600 Subject: [PATCH 1/9] URL parts processor with new ingest module --- x-pack/plugin/ingest/build.gradle | 39 +++++ .../xpack/ingest/IngestPlugin.java | 20 +++ .../xpack/ingest/UrlPartsProcessor.java | 120 +++++++++++++ .../ingest/UrlPartsProcessorFactoryTests.java | 71 ++++++++ .../xpack/ingest/UrlPartsProcessorTests.java | 161 ++++++++++++++++++ 5 files changed, 411 insertions(+) create mode 100644 x-pack/plugin/ingest/build.gradle create mode 100644 x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java create mode 100644 x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java create mode 100644 x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java create mode 100644 x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java diff --git a/x-pack/plugin/ingest/build.gradle b/x-pack/plugin/ingest/build.gradle new file mode 100644 index 0000000000000..2e571538d64bb --- /dev/null +++ b/x-pack/plugin/ingest/build.gradle @@ -0,0 +1,39 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +apply plugin: 'elasticsearch.esplugin' +apply plugin: 'elasticsearch.internal-cluster-test' +esplugin { + name 'x-pack-ingest' + description 'Elasticsearch Expanded Pack Plugin - Ingest' + classname 'org.elasticsearch.xpack.ingest.IngestPlugin' + extendedPlugins = ['x-pack-core'] +} +archivesBaseName = 'x-pack-ingest' + +dependencies { + compileOnly project(path: xpackModule('core'), configuration: 'default') + testImplementation project(path: xpackModule('core'), configuration: 'testArtifacts') + testImplementation project(path: ':modules:ingest-common') + testImplementation project(path: ':modules:lang-mustache') + testImplementation project(path: ':modules:geo') + testImplementation project(path: xpackModule('monitoring'), configuration: 'testArtifacts') +} + +addQaCheckDependencies() diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java new file mode 100644 index 0000000000000..b3b5d44cb3e70 --- /dev/null +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.Processor; +import org.elasticsearch.plugins.Plugin; + +import java.util.Map; + +public class IngestPlugin extends Plugin implements org.elasticsearch.plugins.IngestPlugin { + + @Override + public Map getProcessors(Processor.Parameters parameters) { + return Map.of(UrlPartsProcessor.TYPE, new UrlPartsProcessor.Factory()); + } +} diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java new file mode 100644 index 0000000000000..0db4da8b17e43 --- /dev/null +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java @@ -0,0 +1,120 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +public class UrlPartsProcessor extends AbstractProcessor { + + public static final String TYPE = "url_parts"; + + private final String field; + private final String targetField; + private final boolean removeIfSuccessful; + private final boolean keepOriginal; + + UrlPartsProcessor(String tag, String description, String field, String targetField, boolean removeIfSuccessful, boolean keepOriginal) { + super(tag, description); + this.field = field; + this.targetField = targetField; + this.removeIfSuccessful = removeIfSuccessful; + this.keepOriginal = keepOriginal; + } + + public String getField() { + return field; + } + + public String getTargetField() { + return targetField; + } + + public boolean getRemoveIfSuccessful() { + return removeIfSuccessful; + } + + public boolean getKeepOriginal() { + return keepOriginal; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + String value = ingestDocument.getFieldValue(field, String.class); + + URL url; + try { + url = new URL(value); + } catch (MalformedURLException e) { + throw new IllegalArgumentException("unable to parse URL [" + value + "]"); + } + var urlParts = new HashMap(); + urlParts.put("domain", url.getHost()); + if (url.getRef() != null) { + urlParts.put("fragment", url.getRef()); + } + if (keepOriginal) { + urlParts.put("original", value); + } + final String path = url.getPath(); + if (path != null) { + urlParts.put("path", path); + if (path.contains(".")) { + int periodIndex = path.lastIndexOf('.'); + urlParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); + } + } + if (url.getPort() != -1) { + urlParts.put("port", url.getPort()); + } + if (url.getQuery() != null) { + urlParts.put("query", url.getQuery()); + } + urlParts.put("scheme", url.getProtocol()); + final String userInfo = url.getUserInfo(); + if (userInfo != null) { + urlParts.put("user_info", userInfo); + if (userInfo.contains(":")) { + int colonIndex = userInfo.indexOf(":"); + urlParts.put("username", userInfo.substring(0, colonIndex)); + urlParts.put("password", colonIndex < userInfo.length() ? userInfo.substring(colonIndex + 1) : ""); + } + } + + if (removeIfSuccessful) { + ingestDocument.removeField(field); + } + ingestDocument.setFieldValue(targetField, urlParts); + return ingestDocument; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements Processor.Factory { + + @Override + public UrlPartsProcessor create( + Map registry, String processorTag, + String description, Map config) throws Exception { + String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "url"); + boolean removeIfSuccessful = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_if_successful", false); + boolean keepOriginal = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "keep_original", true); + return new UrlPartsProcessor(processorTag, description, field, targetField, removeIfSuccessful, keepOriginal); + } + } +} diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java new file mode 100644 index 0000000000000..d157798b12388 --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class UrlPartsProcessorFactoryTests extends ESTestCase { + + private UrlPartsProcessor.Factory factory; + + @Before + public void init() { + factory = new UrlPartsProcessor.Factory(); + } + + public void testCreate() throws Exception { + Map config = new HashMap<>(); + String field = randomAlphaOfLength(6); + config.put("field", field); + String targetField = "url"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(6); + config.put("target_field", targetField); + } + boolean removeIfSuccessful = randomBoolean(); + config.put("remove_if_successful", removeIfSuccessful); + boolean keepOriginal = randomBoolean(); + config.put("keep_original", keepOriginal); + + String processorTag = randomAlphaOfLength(10); + UrlPartsProcessor urlPartsProcessor = factory.create(null, processorTag, null, config); + assertThat(urlPartsProcessor.getTag(), equalTo(processorTag)); + assertThat(urlPartsProcessor.getField(), equalTo(field)); + assertThat(urlPartsProcessor.getTargetField(), equalTo(targetField)); + assertThat(urlPartsProcessor.getRemoveIfSuccessful(), equalTo(removeIfSuccessful)); + assertThat(urlPartsProcessor.getKeepOriginal(), equalTo(keepOriginal)); + } + + public void testCreateNoFieldPresent() throws Exception { + Map config = new HashMap<>(); + config.put("value", "value1"); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch(ElasticsearchParseException e) { + assertThat(e.getMessage(), equalTo("[field] required property is missing")); + } + } + + public void testCreateNullField() throws Exception { + Map config = new HashMap<>(); + config.put("field", null); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch(ElasticsearchParseException e) { + assertThat(e.getMessage(), equalTo("[field] required property is missing")); + } + } +} diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java new file mode 100644 index 0000000000000..12ae3fcc024be --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java @@ -0,0 +1,161 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsInAnyOrder; + +public class UrlPartsProcessorTests extends ESTestCase { + + + public void testUrlParts() throws Exception { + + // simple URL + testUrlParsing("http://www.google.com", + Map.of( + "scheme", "http", + "domain", "www.google.com", + "path", "" + )); + + // custom port + testUrlParsing("http://www.google.com:88", + Map.of( + "scheme", "http", + "domain", "www.google.com", + "path", "", + "port", 88 + )); + + // file + testUrlParsing("http://www.google.com:88/google.png", + Map.of( + "scheme", "http", + "domain", "www.google.com", + "extension", "png", + "path", "/google.png", + "port", 88 + )); + + // fragment + testUrlParsing("https://www.google.com:88/foo#bar", + Map.of( + "scheme", "https", + "domain", "www.google.com", + "fragment", "bar", + "path", "/foo", + "port", 88 + )); + + // path, extension + testUrlParsing("https://www.google.com:88/foo.jpg", + Map.of( + "scheme", "https", + "domain", "www.google.com", + "path", "/foo.jpg", + "extension", "jpg", + "port", 88 + )); + + // query + testUrlParsing("https://www.google.com:88/foo?key=val", + Map.of( + "scheme", "https", + "domain", "www.google.com", + "path", "/foo", + "query", "key=val", + "port", 88 + )); + + // user_info + testUrlParsing("https://user:pw@www.google.com:88/foo", + Map.of( + "scheme", "https", + "domain", "www.google.com", + "path", "/foo", + "port", 88, + "user_info", "user:pw", + "username", "user", + "password", "pw" + )); + + // everything! + testUrlParsing("https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", + Map.of( + "scheme", "https", + "domain", "testing.google.com", + "fragment", "anchorVal", + "path", "/foo/bar", + "port", 8080, + "username", "user", + "password", "pw", + "user_info", "user:pw", + "query", "foo1=bar1&foo2=bar2" + )); + + // keep original + testUrlParsing(true, false, "http://www.google.com:88/foo#bar", + Map.of( + "scheme", "http", + "domain", "www.google.com", + "fragment", "bar", + "path", "/foo", + "port", 88 + )); + + // remove if successful + testUrlParsing(false, true, "http://www.google.com:88/foo#bar", + Map.of( + "scheme", "http", + "domain", "www.google.com", + "fragment", "bar", + "path", "/foo", + "port", 88 + )); + } + + private void testUrlParsing(String url, Map expectedValues) throws Exception { + testUrlParsing(false, false, url, expectedValues); + } + + private void testUrlParsing( + boolean keepOriginal, + boolean removeIfSuccessful, + String url, + Map expectedValues) throws Exception { + UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", removeIfSuccessful, keepOriginal); + + Map source = new HashMap<>(); + source.put("field", url); + IngestDocument input = new IngestDocument(source, Map.of()); + IngestDocument output = processor.execute(input); + + Map expectedSourceAndMetadata = new HashMap<>(); + + if (removeIfSuccessful == false) { + expectedSourceAndMetadata.put("field", url); + } + + Map values; + if (keepOriginal) { + values = new HashMap<>(expectedValues); + values.put("original", url); + } else { + values = expectedValues; + } + expectedSourceAndMetadata.put("url", values); + + assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); + } + + +} From 3d7650216f62b61f4a625887937406787557694c Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Tue, 17 Nov 2020 10:36:28 -0600 Subject: [PATCH 2/9] spotless making code less readable :( --- .../xpack/ingest/UrlPartsProcessor.java | 7 +- .../ingest/UrlPartsProcessorFactoryTests.java | 4 +- .../xpack/ingest/UrlPartsProcessorTests.java | 160 ++++++++---------- 3 files changed, 79 insertions(+), 92 deletions(-) diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java index 0db4da8b17e43..9c26ea7234f90 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java @@ -108,8 +108,11 @@ public static final class Factory implements Processor.Factory { @Override public UrlPartsProcessor create( - Map registry, String processorTag, - String description, Map config) throws Exception { + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "url"); boolean removeIfSuccessful = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_if_successful", false); diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java index d157798b12388..99cf6e7831f3d 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java @@ -53,7 +53,7 @@ public void testCreateNoFieldPresent() throws Exception { try { factory.create(null, null, null, config); fail("factory create should have failed"); - } catch(ElasticsearchParseException e) { + } catch (ElasticsearchParseException e) { assertThat(e.getMessage(), equalTo("[field] required property is missing")); } } @@ -64,7 +64,7 @@ public void testCreateNullField() throws Exception { try { factory.create(null, null, null, config); fail("factory create should have failed"); - } catch(ElasticsearchParseException e) { + } catch (ElasticsearchParseException e) { assertThat(e.getMessage(), equalTo("[field] required property is missing")); } } diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java index 12ae3fcc024be..b9f9f4de2ee54 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java @@ -16,122 +16,107 @@ public class UrlPartsProcessorTests extends ESTestCase { - public void testUrlParts() throws Exception { // simple URL - testUrlParsing("http://www.google.com", - Map.of( - "scheme", "http", - "domain", "www.google.com", - "path", "" - )); + testUrlParsing("http://www.google.com", Map.of("scheme", "http", "domain", "www.google.com", "path", "")); // custom port - testUrlParsing("http://www.google.com:88", - Map.of( - "scheme", "http", - "domain", "www.google.com", - "path", "", - "port", 88 - )); + testUrlParsing("http://www.google.com:88", Map.of("scheme", "http", "domain", "www.google.com", "path", "", "port", 88)); // file - testUrlParsing("http://www.google.com:88/google.png", - Map.of( - "scheme", "http", - "domain", "www.google.com", - "extension", "png", - "path", "/google.png", - "port", 88 - )); + testUrlParsing( + "http://www.google.com:88/google.png", + Map.of("scheme", "http", "domain", "www.google.com", "extension", "png", "path", "/google.png", "port", 88) + ); // fragment - testUrlParsing("https://www.google.com:88/foo#bar", - Map.of( - "scheme", "https", - "domain", "www.google.com", - "fragment", "bar", - "path", "/foo", - "port", 88 - )); + testUrlParsing( + "https://www.google.com:88/foo#bar", + Map.of("scheme", "https", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); // path, extension - testUrlParsing("https://www.google.com:88/foo.jpg", - Map.of( - "scheme", "https", - "domain", "www.google.com", - "path", "/foo.jpg", - "extension", "jpg", - "port", 88 - )); + testUrlParsing( + "https://www.google.com:88/foo.jpg", + Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo.jpg", "extension", "jpg", "port", 88) + ); // query - testUrlParsing("https://www.google.com:88/foo?key=val", - Map.of( - "scheme", "https", - "domain", "www.google.com", - "path", "/foo", - "query", "key=val", - "port", 88 - )); + testUrlParsing( + "https://www.google.com:88/foo?key=val", + Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo", "query", "key=val", "port", 88) + ); // user_info - testUrlParsing("https://user:pw@www.google.com:88/foo", + testUrlParsing( + "https://user:pw@www.google.com:88/foo", Map.of( - "scheme", "https", - "domain", "www.google.com", - "path", "/foo", - "port", 88, - "user_info", "user:pw", - "username", "user", - "password", "pw" - )); + "scheme", + "https", + "domain", + "www.google.com", + "path", + "/foo", + "port", + 88, + "user_info", + "user:pw", + "username", + "user", + "password", + "pw" + ) + ); // everything! - testUrlParsing("https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", + testUrlParsing( + "https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", Map.of( - "scheme", "https", - "domain", "testing.google.com", - "fragment", "anchorVal", - "path", "/foo/bar", - "port", 8080, - "username", "user", - "password", "pw", - "user_info", "user:pw", - "query", "foo1=bar1&foo2=bar2" - )); + "scheme", + "https", + "domain", + "testing.google.com", + "fragment", + "anchorVal", + "path", + "/foo/bar", + "port", + 8080, + "username", + "user", + "password", + "pw", + "user_info", + "user:pw", + "query", + "foo1=bar1&foo2=bar2" + ) + ); // keep original - testUrlParsing(true, false, "http://www.google.com:88/foo#bar", - Map.of( - "scheme", "http", - "domain", "www.google.com", - "fragment", "bar", - "path", "/foo", - "port", 88 - )); + testUrlParsing( + true, + false, + "http://www.google.com:88/foo#bar", + Map.of("scheme", "http", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); // remove if successful - testUrlParsing(false, true, "http://www.google.com:88/foo#bar", - Map.of( - "scheme", "http", - "domain", "www.google.com", - "fragment", "bar", - "path", "/foo", - "port", 88 - )); + testUrlParsing( + false, + true, + "http://www.google.com:88/foo#bar", + Map.of("scheme", "http", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) + ); } private void testUrlParsing(String url, Map expectedValues) throws Exception { testUrlParsing(false, false, url, expectedValues); } - private void testUrlParsing( - boolean keepOriginal, - boolean removeIfSuccessful, - String url, - Map expectedValues) throws Exception { + private void testUrlParsing(boolean keepOriginal, boolean removeIfSuccessful, String url, Map expectedValues) + throws Exception { UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", removeIfSuccessful, keepOriginal); Map source = new HashMap<>(); @@ -157,5 +142,4 @@ private void testUrlParsing( assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); } - } From 828914e249183e31c86cadf6041936c5ae3af7e3 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Tue, 17 Nov 2020 10:49:30 -0600 Subject: [PATCH 3/9] forbidden APIs --- .../java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java index 9c26ea7234f90..da4f2adb61d8f 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java @@ -67,7 +67,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { if (keepOriginal) { urlParts.put("original", value); } - final String path = url.getPath(); + final String path = url.toURI().getPath(); if (path != null) { urlParts.put("path", path); if (path.contains(".")) { From caf2715201ea493f835e8e013e86dd2cc58de363 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Tue, 17 Nov 2020 16:32:45 -0600 Subject: [PATCH 4/9] add tests, don't remove target field --- .../xpack/ingest/UrlPartsProcessor.java | 2 +- .../xpack/ingest/UrlPartsProcessorTests.java | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java index da4f2adb61d8f..fada7e1015124 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java @@ -92,7 +92,7 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { } } - if (removeIfSuccessful) { + if (removeIfSuccessful && targetField.equals(field) == false) { ingestDocument.removeField(field); } ingestDocument.setFieldValue(targetField, urlParts); diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java index b9f9f4de2ee54..a472d73a0e0d3 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java @@ -13,6 +13,7 @@ import java.util.Map; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; public class UrlPartsProcessorTests extends ESTestCase { @@ -111,6 +112,32 @@ public void testUrlParts() throws Exception { ); } + public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception { + String field = "field"; + UrlPartsProcessor processor = new UrlPartsProcessor(null, null, field, field, true, false); + + Map source = new HashMap<>(); + source.put(field, "http://www.google.com"); + IngestDocument input = new IngestDocument(source, Map.of()); + IngestDocument output = processor.execute(input); + + Map expectedSourceAndMetadata = new HashMap<>(); + expectedSourceAndMetadata.put(field, Map.of("scheme", "http", "domain", "www.google.com", "path", "")); + assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); + } + + public void testInvalidUrl() { + String url = "not_a_valid_url"; + UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", true, false); + + Map source = new HashMap<>(); + source.put("field", url); + IngestDocument input = new IngestDocument(source, Map.of()); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.execute(input)); + assertThat(e.getMessage(), containsString("unable to parse URL [" + url + "]")); + } + private void testUrlParsing(String url, Map expectedValues) throws Exception { testUrlParsing(false, false, url, expectedValues); } From 0c9163fabd8d91ce375138d4c649307f311a9ea6 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Wed, 18 Nov 2020 11:27:20 -0600 Subject: [PATCH 5/9] switch to java.net.URI and add additional test cases --- .../xpack/ingest/UrlPartsProcessor.java | 30 +++++++++---------- .../xpack/ingest/UrlPartsProcessorTests.java | 17 ++++++++++- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java index fada7e1015124..a3cecc9c1b881 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java @@ -11,8 +11,8 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; -import java.net.MalformedURLException; -import java.net.URL; +import java.net.URI; +import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; @@ -53,21 +53,21 @@ public boolean getKeepOriginal() { public IngestDocument execute(IngestDocument ingestDocument) throws Exception { String value = ingestDocument.getFieldValue(field, String.class); - URL url; + URI uri; try { - url = new URL(value); - } catch (MalformedURLException e) { + uri = new URI(value); + } catch (URISyntaxException e) { throw new IllegalArgumentException("unable to parse URL [" + value + "]"); } var urlParts = new HashMap(); - urlParts.put("domain", url.getHost()); - if (url.getRef() != null) { - urlParts.put("fragment", url.getRef()); + urlParts.put("domain", uri.getHost()); + if (uri.getFragment() != null) { + urlParts.put("fragment", uri.getFragment()); } if (keepOriginal) { urlParts.put("original", value); } - final String path = url.toURI().getPath(); + final String path = uri.getPath(); if (path != null) { urlParts.put("path", path); if (path.contains(".")) { @@ -75,14 +75,14 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { urlParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); } } - if (url.getPort() != -1) { - urlParts.put("port", url.getPort()); + if (uri.getPort() != -1) { + urlParts.put("port", uri.getPort()); } - if (url.getQuery() != null) { - urlParts.put("query", url.getQuery()); + if (uri.getQuery() != null) { + urlParts.put("query", uri.getQuery()); } - urlParts.put("scheme", url.getProtocol()); - final String userInfo = url.getUserInfo(); + urlParts.put("scheme", uri.getScheme()); + final String userInfo = uri.getUserInfo(); if (userInfo != null) { urlParts.put("user_info", userInfo); if (userInfo.contains(":")) { diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java index a472d73a0e0d3..07141acb72548 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java @@ -95,6 +95,21 @@ public void testUrlParts() throws Exception { ) ); + testUrlParsing( + "ftp://ftp.is.co.za/rfc/rfc1808.txt", + Map.of("scheme", "ftp", "path", "/rfc/rfc1808.txt", "extension", "txt", "domain", "ftp.is.co.za") + ); + + testUrlParsing( + "telnet://192.0.2.16:80/", + Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16") + ); + + testUrlParsing( + "ldap://[2001:db8::7]/c=GB?objectClass?one", + Map.of("scheme", "ldap", "path", "/c=GB", "query", "objectClass?one", "domain", "[2001:db8::7]") + ); + // keep original testUrlParsing( true, @@ -127,7 +142,7 @@ public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception { } public void testInvalidUrl() { - String url = "not_a_valid_url"; + String url = "not:\\/_a_valid_url"; UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", true, false); Map source = new HashMap<>(); From 30b9effef3197e36bec1e8aba515f75bdb0262cf Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Wed, 18 Nov 2020 14:11:10 -0600 Subject: [PATCH 6/9] disable testing conventions check --- x-pack/plugin/ingest/build.gradle | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/ingest/build.gradle b/x-pack/plugin/ingest/build.gradle index 2e571538d64bb..f09c832728880 100644 --- a/x-pack/plugin/ingest/build.gradle +++ b/x-pack/plugin/ingest/build.gradle @@ -37,3 +37,5 @@ dependencies { } addQaCheckDependencies() + +testingConventions.enabled = false From 44bb39448e815e65dc9e51bd8e7f8ca1a2e897a4 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Wed, 18 Nov 2020 14:17:06 -0600 Subject: [PATCH 7/9] spotless. again. --- .../elasticsearch/xpack/ingest/UrlPartsProcessorTests.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java index 07141acb72548..f2992e96f02c6 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java @@ -100,10 +100,7 @@ public void testUrlParts() throws Exception { Map.of("scheme", "ftp", "path", "/rfc/rfc1808.txt", "extension", "txt", "domain", "ftp.is.co.za") ); - testUrlParsing( - "telnet://192.0.2.16:80/", - Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16") - ); + testUrlParsing("telnet://192.0.2.16:80/", Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16")); testUrlParsing( "ldap://[2001:db8::7]/c=GB?objectClass?one", From 6b052cb443611b0d00552408350ad7192562ea62 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Wed, 18 Nov 2020 15:04:54 -0600 Subject: [PATCH 8/9] rename URL to URI --- .../xpack/ingest/IngestPlugin.java | 2 +- ...sProcessor.java => UriPartsProcessor.java} | 38 ++++++------ ...ava => UriPartsProcessorFactoryTests.java} | 18 +++--- ...Tests.java => UriPartsProcessorTests.java} | 58 +++++++++---------- 4 files changed, 58 insertions(+), 58 deletions(-) rename x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/{UrlPartsProcessor.java => UriPartsProcessor.java} (77%) rename x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/{UrlPartsProcessorFactoryTests.java => UriPartsProcessorFactoryTests.java} (79%) rename x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/{UrlPartsProcessorTests.java => UriPartsProcessorTests.java} (80%) diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java index b3b5d44cb3e70..9f673b34d5119 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java @@ -15,6 +15,6 @@ public class IngestPlugin extends Plugin implements org.elasticsearch.plugins.In @Override public Map getProcessors(Processor.Parameters parameters) { - return Map.of(UrlPartsProcessor.TYPE, new UrlPartsProcessor.Factory()); + return Map.of(UriPartsProcessor.TYPE, new UriPartsProcessor.Factory()); } } diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java similarity index 77% rename from x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java rename to x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java index a3cecc9c1b881..2a8d5cd2e8969 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UrlPartsProcessor.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/UriPartsProcessor.java @@ -16,16 +16,16 @@ import java.util.HashMap; import java.util.Map; -public class UrlPartsProcessor extends AbstractProcessor { +public class UriPartsProcessor extends AbstractProcessor { - public static final String TYPE = "url_parts"; + public static final String TYPE = "uri_parts"; private final String field; private final String targetField; private final boolean removeIfSuccessful; private final boolean keepOriginal; - UrlPartsProcessor(String tag, String description, String field, String targetField, boolean removeIfSuccessful, boolean keepOriginal) { + UriPartsProcessor(String tag, String description, String field, String targetField, boolean removeIfSuccessful, boolean keepOriginal) { super(tag, description); this.field = field; this.targetField = targetField; @@ -57,45 +57,45 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception { try { uri = new URI(value); } catch (URISyntaxException e) { - throw new IllegalArgumentException("unable to parse URL [" + value + "]"); + throw new IllegalArgumentException("unable to parse URI [" + value + "]"); } - var urlParts = new HashMap(); - urlParts.put("domain", uri.getHost()); + var uriParts = new HashMap(); + uriParts.put("domain", uri.getHost()); if (uri.getFragment() != null) { - urlParts.put("fragment", uri.getFragment()); + uriParts.put("fragment", uri.getFragment()); } if (keepOriginal) { - urlParts.put("original", value); + uriParts.put("original", value); } final String path = uri.getPath(); if (path != null) { - urlParts.put("path", path); + uriParts.put("path", path); if (path.contains(".")) { int periodIndex = path.lastIndexOf('.'); - urlParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); + uriParts.put("extension", periodIndex < path.length() ? path.substring(periodIndex + 1) : ""); } } if (uri.getPort() != -1) { - urlParts.put("port", uri.getPort()); + uriParts.put("port", uri.getPort()); } if (uri.getQuery() != null) { - urlParts.put("query", uri.getQuery()); + uriParts.put("query", uri.getQuery()); } - urlParts.put("scheme", uri.getScheme()); + uriParts.put("scheme", uri.getScheme()); final String userInfo = uri.getUserInfo(); if (userInfo != null) { - urlParts.put("user_info", userInfo); + uriParts.put("user_info", userInfo); if (userInfo.contains(":")) { int colonIndex = userInfo.indexOf(":"); - urlParts.put("username", userInfo.substring(0, colonIndex)); - urlParts.put("password", colonIndex < userInfo.length() ? userInfo.substring(colonIndex + 1) : ""); + uriParts.put("username", userInfo.substring(0, colonIndex)); + uriParts.put("password", colonIndex < userInfo.length() ? userInfo.substring(colonIndex + 1) : ""); } } if (removeIfSuccessful && targetField.equals(field) == false) { ingestDocument.removeField(field); } - ingestDocument.setFieldValue(targetField, urlParts); + ingestDocument.setFieldValue(targetField, uriParts); return ingestDocument; } @@ -107,7 +107,7 @@ public String getType() { public static final class Factory implements Processor.Factory { @Override - public UrlPartsProcessor create( + public UriPartsProcessor create( Map registry, String processorTag, String description, @@ -117,7 +117,7 @@ public UrlPartsProcessor create( String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "url"); boolean removeIfSuccessful = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_if_successful", false); boolean keepOriginal = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "keep_original", true); - return new UrlPartsProcessor(processorTag, description, field, targetField, removeIfSuccessful, keepOriginal); + return new UriPartsProcessor(processorTag, description, field, targetField, removeIfSuccessful, keepOriginal); } } } diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java similarity index 79% rename from x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java rename to x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java index 99cf6e7831f3d..c4d142b37946c 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorFactoryTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorFactoryTests.java @@ -15,13 +15,13 @@ import static org.hamcrest.CoreMatchers.equalTo; -public class UrlPartsProcessorFactoryTests extends ESTestCase { +public class UriPartsProcessorFactoryTests extends ESTestCase { - private UrlPartsProcessor.Factory factory; + private UriPartsProcessor.Factory factory; @Before public void init() { - factory = new UrlPartsProcessor.Factory(); + factory = new UriPartsProcessor.Factory(); } public void testCreate() throws Exception { @@ -39,12 +39,12 @@ public void testCreate() throws Exception { config.put("keep_original", keepOriginal); String processorTag = randomAlphaOfLength(10); - UrlPartsProcessor urlPartsProcessor = factory.create(null, processorTag, null, config); - assertThat(urlPartsProcessor.getTag(), equalTo(processorTag)); - assertThat(urlPartsProcessor.getField(), equalTo(field)); - assertThat(urlPartsProcessor.getTargetField(), equalTo(targetField)); - assertThat(urlPartsProcessor.getRemoveIfSuccessful(), equalTo(removeIfSuccessful)); - assertThat(urlPartsProcessor.getKeepOriginal(), equalTo(keepOriginal)); + UriPartsProcessor uriPartsProcessor = factory.create(null, processorTag, null, config); + assertThat(uriPartsProcessor.getTag(), equalTo(processorTag)); + assertThat(uriPartsProcessor.getField(), equalTo(field)); + assertThat(uriPartsProcessor.getTargetField(), equalTo(targetField)); + assertThat(uriPartsProcessor.getRemoveIfSuccessful(), equalTo(removeIfSuccessful)); + assertThat(uriPartsProcessor.getKeepOriginal(), equalTo(keepOriginal)); } public void testCreateNoFieldPresent() throws Exception { diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java similarity index 80% rename from x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java rename to x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java index f2992e96f02c6..8d09d27b21ba5 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UrlPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java @@ -15,42 +15,42 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; -public class UrlPartsProcessorTests extends ESTestCase { +public class UriPartsProcessorTests extends ESTestCase { - public void testUrlParts() throws Exception { + public void testUriParts() throws Exception { - // simple URL - testUrlParsing("http://www.google.com", Map.of("scheme", "http", "domain", "www.google.com", "path", "")); + // simple URI + testUriParsing("http://www.google.com", Map.of("scheme", "http", "domain", "www.google.com", "path", "")); // custom port - testUrlParsing("http://www.google.com:88", Map.of("scheme", "http", "domain", "www.google.com", "path", "", "port", 88)); + testUriParsing("http://www.google.com:88", Map.of("scheme", "http", "domain", "www.google.com", "path", "", "port", 88)); // file - testUrlParsing( + testUriParsing( "http://www.google.com:88/google.png", Map.of("scheme", "http", "domain", "www.google.com", "extension", "png", "path", "/google.png", "port", 88) ); // fragment - testUrlParsing( + testUriParsing( "https://www.google.com:88/foo#bar", Map.of("scheme", "https", "domain", "www.google.com", "fragment", "bar", "path", "/foo", "port", 88) ); // path, extension - testUrlParsing( + testUriParsing( "https://www.google.com:88/foo.jpg", Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo.jpg", "extension", "jpg", "port", 88) ); // query - testUrlParsing( + testUriParsing( "https://www.google.com:88/foo?key=val", Map.of("scheme", "https", "domain", "www.google.com", "path", "/foo", "query", "key=val", "port", 88) ); // user_info - testUrlParsing( + testUriParsing( "https://user:pw@www.google.com:88/foo", Map.of( "scheme", @@ -71,7 +71,7 @@ public void testUrlParts() throws Exception { ); // everything! - testUrlParsing( + testUriParsing( "https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", Map.of( "scheme", @@ -95,20 +95,20 @@ public void testUrlParts() throws Exception { ) ); - testUrlParsing( + testUriParsing( "ftp://ftp.is.co.za/rfc/rfc1808.txt", Map.of("scheme", "ftp", "path", "/rfc/rfc1808.txt", "extension", "txt", "domain", "ftp.is.co.za") ); - testUrlParsing("telnet://192.0.2.16:80/", Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16")); + testUriParsing("telnet://192.0.2.16:80/", Map.of("scheme", "telnet", "path", "/", "port", 80, "domain", "192.0.2.16")); - testUrlParsing( + testUriParsing( "ldap://[2001:db8::7]/c=GB?objectClass?one", Map.of("scheme", "ldap", "path", "/c=GB", "query", "objectClass?one", "domain", "[2001:db8::7]") ); // keep original - testUrlParsing( + testUriParsing( true, false, "http://www.google.com:88/foo#bar", @@ -116,7 +116,7 @@ public void testUrlParts() throws Exception { ); // remove if successful - testUrlParsing( + testUriParsing( false, true, "http://www.google.com:88/foo#bar", @@ -126,7 +126,7 @@ public void testUrlParts() throws Exception { public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception { String field = "field"; - UrlPartsProcessor processor = new UrlPartsProcessor(null, null, field, field, true, false); + UriPartsProcessor processor = new UriPartsProcessor(null, null, field, field, true, false); Map source = new HashMap<>(); source.put(field, "http://www.google.com"); @@ -138,41 +138,41 @@ public void testRemoveIfSuccessfulDoesNotRemoveTargetField() throws Exception { assertThat(output.getSourceAndMetadata().entrySet(), containsInAnyOrder(expectedSourceAndMetadata.entrySet().toArray())); } - public void testInvalidUrl() { - String url = "not:\\/_a_valid_url"; - UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", true, false); + public void testInvalidUri() { + String uri = "not:\\/_a_valid_uri"; + UriPartsProcessor processor = new UriPartsProcessor(null, null, "field", "url", true, false); Map source = new HashMap<>(); - source.put("field", url); + source.put("field", uri); IngestDocument input = new IngestDocument(source, Map.of()); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.execute(input)); - assertThat(e.getMessage(), containsString("unable to parse URL [" + url + "]")); + assertThat(e.getMessage(), containsString("unable to parse URI [" + uri + "]")); } - private void testUrlParsing(String url, Map expectedValues) throws Exception { - testUrlParsing(false, false, url, expectedValues); + private void testUriParsing(String uri, Map expectedValues) throws Exception { + testUriParsing(false, false, uri, expectedValues); } - private void testUrlParsing(boolean keepOriginal, boolean removeIfSuccessful, String url, Map expectedValues) + private void testUriParsing(boolean keepOriginal, boolean removeIfSuccessful, String uri, Map expectedValues) throws Exception { - UrlPartsProcessor processor = new UrlPartsProcessor(null, null, "field", "url", removeIfSuccessful, keepOriginal); + UriPartsProcessor processor = new UriPartsProcessor(null, null, "field", "url", removeIfSuccessful, keepOriginal); Map source = new HashMap<>(); - source.put("field", url); + source.put("field", uri); IngestDocument input = new IngestDocument(source, Map.of()); IngestDocument output = processor.execute(input); Map expectedSourceAndMetadata = new HashMap<>(); if (removeIfSuccessful == false) { - expectedSourceAndMetadata.put("field", url); + expectedSourceAndMetadata.put("field", uri); } Map values; if (keepOriginal) { values = new HashMap<>(expectedValues); - values.put("original", url); + values.put("original", uri); } else { values = expectedValues; } From c128522e4afef02dd24f5279d9fee483ba921b8a Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Fri, 20 Nov 2020 07:54:48 -0600 Subject: [PATCH 9/9] test case for blank password in user info --- .../xpack/ingest/UriPartsProcessorTests.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java index 8d09d27b21ba5..2153e34db2ac9 100644 --- a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/UriPartsProcessorTests.java @@ -70,6 +70,27 @@ public void testUriParts() throws Exception { ) ); + // user_info without password + testUriParsing( + "https://user:@www.google.com:88/foo", + Map.of( + "scheme", + "https", + "domain", + "www.google.com", + "path", + "/foo", + "port", + 88, + "user_info", + "user:", + "username", + "user", + "password", + "" + ) + ); + // everything! testUriParsing( "https://user:pw@testing.google.com:8080/foo/bar?foo1=bar1&foo2=bar2#anchorVal", @@ -95,6 +116,7 @@ public void testUriParts() throws Exception { ) ); + // non-http schemes testUriParsing( "ftp://ftp.is.co.za/rfc/rfc1808.txt", Map.of("scheme", "ftp", "path", "/rfc/rfc1808.txt", "extension", "txt", "domain", "ftp.is.co.za")