Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding an empty response processor that always returns an empty optional #99

Merged
merged 2 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Data/src/main/java/org/tribuo/data/columnar/RowProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,21 @@ public RowProcessor(ResponseProcessor<T> responseProcessor, Map<String,FieldProc
this(Collections.emptyList(),null,responseProcessor,fieldProcessorMap,featureProcessors);
}

/**
* Constructs a RowProcessor using the supplied responseProcessor to extract the response variable,
* and the supplied fieldProcessorMap to control which fields are parsed and how they are parsed.
* <p>
* Additionally this processor can extract and populate metadata fields on the generated examples
* (e.g., the row number, date stamps).
* @param metadataExtractors The metadata extractors to run per example. If two metadata extractors emit
* the same metadata name then the constructor throws a PropertyException.
* @param responseProcessor The response processor to use.
* @param fieldProcessorMap The keys are the field names and the values are the field processors to apply to those fields.
*/
public RowProcessor(List<FieldExtractor<?>> metadataExtractors, ResponseProcessor<T> responseProcessor, Map<String,FieldProcessor> fieldProcessorMap) {
this(metadataExtractors,null,responseProcessor,fieldProcessorMap,Collections.emptySet());
}

/**
* Constructs a RowProcessor using the supplied responseProcessor to extract the response variable,
* and the supplied fieldProcessorMap to control which fields are parsed and how they are parsed.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.tribuo.data.columnar.processors.response;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import org.tribuo.Output;
import org.tribuo.OutputFactory;
import org.tribuo.data.columnar.ResponseProcessor;

import java.util.Optional;

/**
* A {@link ResponseProcessor} that always emits an empty optional.
* <p>
* This class is designed to be used when loading columnar datasets
* which will never have a response (e.g., for clustering or anomaly detection).
* <p>
* It still requires an output factory, even though it's never used to generate
* an output, because the output factory provides the type for the columnar infrastructure.
*/
public final class EmptyResponseProcessor<T extends Output<T>> implements ResponseProcessor<T> {

public static final String FIELD_NAME = "TRIBUO##NULL_RESPONSE_PROCESSOR";

@Config(mandatory = true,description="Output factory to type the columnar loader.")
private OutputFactory<T> outputFactory;

/**
* for OLCUT.
*/
private EmptyResponseProcessor() {}

/**
* Constructs a response processor which never emits a response.
* <p>
* It contains an output factory as this types the whole columnar infrastructure.
* @param outputFactory The output factory to use.
*/
public EmptyResponseProcessor(OutputFactory<T> outputFactory) {
this.outputFactory = outputFactory;
}

@Override
public OutputFactory<T> getOutputFactory() {
return outputFactory;
}

@Override
public String getFieldName() {
return FIELD_NAME;
}

/**
* This is a no-op as the empty response processor doesn't inspect a field.
* @param fieldName The field name.
*/
@Deprecated
@Override
public void setFieldName(String fieldName) { }

/**
* This method always returns {@link Optional#empty}.
* @param value The value to process.
* @return {@link Optional#empty}.
*/
@Override
public Optional<T> process(String value) {
return Optional.empty();
}

@Override
public String toString() {
return "EmptyResponseProcessor(outputFactory="+outputFactory.toString()+")";
}

@Override
public ConfiguredObjectProvenance getProvenance() {
return new ConfiguredObjectProvenanceImpl(this,"ResponseProcessor");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,10 @@

/**
* Provides implementations of {@link org.tribuo.data.columnar.ResponseProcessor}.
* <p>
* Note that the {@link org.tribuo.data.columnar.processors.response.EmptyResponseProcessor}
* should only be used when the columnar data source will never contain a response, and so
* the {@link org.tribuo.data.columnar.RowProcessor} should always return the unknown
* output of the appropriate type.
*/
package org.tribuo.data.columnar.processors.response;
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.tribuo.data.columnar.processors.response;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.tribuo.test.MockOutput;
import org.tribuo.test.MockOutputFactory;

public class EmptyResponseProcessorTest {

@Test
public void basicTest() {
MockOutputFactory outputFactory = new MockOutputFactory();
EmptyResponseProcessor<MockOutput> rp = new EmptyResponseProcessor<>(outputFactory);

// Check the output factory is stored correctly
Assertions.assertEquals(outputFactory,rp.getOutputFactory());

// Check the field name is right
Assertions.assertEquals(EmptyResponseProcessor.FIELD_NAME, rp.getFieldName());

// setFieldName is a no-op on this response processor
rp.setFieldName("Something");
Assertions.assertEquals(EmptyResponseProcessor.FIELD_NAME, rp.getFieldName());

// Check that it doesn't throw exceptions when given odd text, and that it always returns Optional.empty.
Assertions.assertFalse(rp.process("").isPresent());
Assertions.assertFalse(rp.process("test").isPresent());
Assertions.assertFalse(rp.process("!@$#$!").isPresent());
Assertions.assertFalse(rp.process("\n").isPresent());
Assertions.assertFalse(rp.process("\t").isPresent());
Assertions.assertFalse(rp.process(null).isPresent());
}

}