diff --git a/Data/src/main/java/org/tribuo/data/columnar/RowProcessor.java b/Data/src/main/java/org/tribuo/data/columnar/RowProcessor.java index f82dea47f..92d6f2d45 100644 --- a/Data/src/main/java/org/tribuo/data/columnar/RowProcessor.java +++ b/Data/src/main/java/org/tribuo/data/columnar/RowProcessor.java @@ -118,6 +118,21 @@ public RowProcessor(ResponseProcessor responseProcessor, Map + * Additionally this processor can extract and populate metadata fields on the generated examples + * (e.g., the row number, date stamps). + * @param metadataExtractors The metadata extractors to run per example. If two metadata extractors emit + * the same metadata name then the constructor throws a PropertyException. + * @param responseProcessor The response processor to use. + * @param fieldProcessorMap The keys are the field names and the values are the field processors to apply to those fields. + */ + public RowProcessor(List> metadataExtractors, ResponseProcessor responseProcessor, Map fieldProcessorMap) { + this(metadataExtractors,null,responseProcessor,fieldProcessorMap,Collections.emptySet()); + } + /** * Constructs a RowProcessor using the supplied responseProcessor to extract the response variable, * and the supplied fieldProcessorMap to control which fields are parsed and how they are parsed. diff --git a/Data/src/main/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessor.java b/Data/src/main/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessor.java new file mode 100644 index 000000000..153e10e73 --- /dev/null +++ b/Data/src/main/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessor.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tribuo.data.columnar.processors.response; + +import com.oracle.labs.mlrg.olcut.config.Config; +import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; +import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; +import org.tribuo.Output; +import org.tribuo.OutputFactory; +import org.tribuo.data.columnar.ResponseProcessor; + +import java.util.Optional; + +/** + * A {@link ResponseProcessor} that always emits an empty optional. + *

+ * This class is designed to be used when loading columnar datasets + * which will never have a response (e.g., for clustering or anomaly detection). + *

+ * It still requires an output factory, even though it's never used to generate + * an output, because the output factory provides the type for the columnar infrastructure. + */ +public final class EmptyResponseProcessor> implements ResponseProcessor { + + public static final String FIELD_NAME = "TRIBUO##NULL_RESPONSE_PROCESSOR"; + + @Config(mandatory = true,description="Output factory to type the columnar loader.") + private OutputFactory outputFactory; + + /** + * for OLCUT. + */ + private EmptyResponseProcessor() {} + + /** + * Constructs a response processor which never emits a response. + *

+ * It contains an output factory as this types the whole columnar infrastructure. + * @param outputFactory The output factory to use. + */ + public EmptyResponseProcessor(OutputFactory outputFactory) { + this.outputFactory = outputFactory; + } + + @Override + public OutputFactory getOutputFactory() { + return outputFactory; + } + + @Override + public String getFieldName() { + return FIELD_NAME; + } + + /** + * This is a no-op as the empty response processor doesn't inspect a field. + * @param fieldName The field name. + */ + @Deprecated + @Override + public void setFieldName(String fieldName) { } + + /** + * This method always returns {@link Optional#empty}. + * @param value The value to process. + * @return {@link Optional#empty}. + */ + @Override + public Optional process(String value) { + return Optional.empty(); + } + + @Override + public String toString() { + return "EmptyResponseProcessor(outputFactory="+outputFactory.toString()+")"; + } + + @Override + public ConfiguredObjectProvenance getProvenance() { + return new ConfiguredObjectProvenanceImpl(this,"ResponseProcessor"); + } +} diff --git a/Data/src/main/java/org/tribuo/data/columnar/processors/response/package-info.java b/Data/src/main/java/org/tribuo/data/columnar/processors/response/package-info.java index 876975246..3baf42aa3 100644 --- a/Data/src/main/java/org/tribuo/data/columnar/processors/response/package-info.java +++ b/Data/src/main/java/org/tribuo/data/columnar/processors/response/package-info.java @@ -16,5 +16,10 @@ /** * Provides implementations of {@link org.tribuo.data.columnar.ResponseProcessor}. + *

+ * Note that the {@link org.tribuo.data.columnar.processors.response.EmptyResponseProcessor} + * should only be used when the columnar data source will never contain a response, and so + * the {@link org.tribuo.data.columnar.RowProcessor} should always return the unknown + * output of the appropriate type. */ package org.tribuo.data.columnar.processors.response; \ No newline at end of file diff --git a/Data/src/test/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessorTest.java b/Data/src/test/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessorTest.java new file mode 100644 index 000000000..d1deba0c2 --- /dev/null +++ b/Data/src/test/java/org/tribuo/data/columnar/processors/response/EmptyResponseProcessorTest.java @@ -0,0 +1,34 @@ +package org.tribuo.data.columnar.processors.response; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.tribuo.test.MockOutput; +import org.tribuo.test.MockOutputFactory; + +public class EmptyResponseProcessorTest { + + @Test + public void basicTest() { + MockOutputFactory outputFactory = new MockOutputFactory(); + EmptyResponseProcessor rp = new EmptyResponseProcessor<>(outputFactory); + + // Check the output factory is stored correctly + Assertions.assertEquals(outputFactory,rp.getOutputFactory()); + + // Check the field name is right + Assertions.assertEquals(EmptyResponseProcessor.FIELD_NAME, rp.getFieldName()); + + // setFieldName is a no-op on this response processor + rp.setFieldName("Something"); + Assertions.assertEquals(EmptyResponseProcessor.FIELD_NAME, rp.getFieldName()); + + // Check that it doesn't throw exceptions when given odd text, and that it always returns Optional.empty. + Assertions.assertFalse(rp.process("").isPresent()); + Assertions.assertFalse(rp.process("test").isPresent()); + Assertions.assertFalse(rp.process("!@$#$!").isPresent()); + Assertions.assertFalse(rp.process("\n").isPresent()); + Assertions.assertFalse(rp.process("\t").isPresent()); + Assertions.assertFalse(rp.process(null).isPresent()); + } + +}