Skip to content

Commit

Permalink
Pre-release 1.0.3-4: Double NaN fixes.
Browse files Browse the repository at this point in the history
Fixed multiple occurrences of NaN values when only a single annotation
is given of a category for coding studies. Reverted prefix pruning to
none by default. Added a coding test. Removed deprecated dependency to
hucompute.utilities.
  • Loading branch information
Manuel Stoeckel committed Sep 5, 2019
1 parent 8614144 commit 02f6206
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 68 deletions.
26 changes: 4 additions & 22 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.hucompute.textimager.uima</groupId>
<artifactId>biofid-agreement</artifactId>
<version>1.0.3-3</version>
<version>1.0.3-4</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down Expand Up @@ -127,27 +127,9 @@
<version>20180130</version>
</dependency>
<dependency>
<groupId>org.hucompute</groupId>
<artifactId>utilities</artifactId>
<version>0.1</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</exclusion>
<exclusion>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</exclusion>
</exclusions>
<groupId>com.github.texttechnologylab</groupId>
<artifactId>Utilities</artifactId>
<version>-SNAPSHOT</version>
</dependency>
</dependencies>

Expand Down
18 changes: 7 additions & 11 deletions src/main/java/org/biofid/agreement/engine/AbstractIAAEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import com.google.common.primitives.Doubles;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
Expand Down Expand Up @@ -173,7 +175,7 @@ public abstract class AbstractIAAEngine extends JCasConsumer_ImplBase {
name = PARAM_TARGET_LOCATION,
defaultValue = "System.out"
)
private static String targetLocation;
private String targetLocation;

/**
* Whether to overwrite existing files in the given target location.
Expand All @@ -186,7 +188,7 @@ public abstract class AbstractIAAEngine extends JCasConsumer_ImplBase {
name = PARAM_OVERWRITE_EXISTING,
defaultValue = "true"
)
private static Boolean pOverwriteExisting;
private Boolean pOverwriteExisting;

protected ExtendedLogger logger;
long viewCount;
Expand Down Expand Up @@ -249,7 +251,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
}
}

public CSVPrinter getCsvPrinter(@NotNull String suffix) throws IOException {
CSVPrinter getCsvPrinter(@NotNull String suffix) throws IOException {
Appendable targetAppendable;
switch (targetLocation) {
case "System.out":
Expand Down Expand Up @@ -293,16 +295,10 @@ protected HashSet<Annotation> getOverlappedAnnotations(JCas viewCas, Class<? ext
return overlappedAnnotations;
}

protected void printStudyResults(ICategorySpecificAgreement agreement, TreeSet<String> categories, Collection<String> annotators) {
for (String category : categories) {
System.out.printf("%s\t%f\n", category, agreement.calculateCategoryAgreement(category));
}
System.out.println();
}

protected void printStudyResultsAndStatistics(ICategorySpecificAgreement agreement, CountMap<String> categoryCount, HashMap<String, CountMap<String>> annotatorCategoryCount, TreeSet<String> categories, Collection<String> annotators, CSVPrinter csvPrinter) throws IOException {
for (String category : categories) {
csvPrinter.printRecord(category, categoryCount.get(category), agreement.calculateCategoryAgreement(category));
double value = agreement.calculateCategoryAgreement(category);
csvPrinter.printRecord(category, categoryCount.get(category), Double.isNaN(value) ? 0.0 : value);
}
csvPrinter.println();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,8 @@ private void createAgreementAnnotations(JCas jCas, LinkedHashMap<Integer, ICodin
StringArray categoryValuePairs = new StringArray(viewIAA, categories.size() * 2);
for (int i = 0; i < categoryStrings.length; i++) {
String category = categoryStrings[i];
String categoryAgreement = String.format("%f", ((ICategorySpecificAgreement) agreement).calculateCategoryAgreement(category));
double value = ((ICategorySpecificAgreement) agreement).calculateCategoryAgreement(category);
String categoryAgreement = String.format("%01.10f", Double.isNaN(value) ? 0.0 : value);
categoryValuePairs.set(i * 2, category);
categoryValuePairs.set(i * 2 + 1, categoryAgreement);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ public ICodingAnnotationItem[] addItemSetsAsArray(Set<String>[] annotations) {
case ALL:
cartesianProduct.forEach(item -> items.add(this.addItemAsArray(getAnnotations(item))));
return items.toArray(new ICodingAnnotationItem[0]);
case MAX:
default:
TreeSet<List<String>> treeSet = new TreeSet<>(sortByAgreement);
treeSet.addAll(cartesianProduct);
List<String> last = treeSet.last();
ICodingAnnotationItem maxAgreementItem = this.addItemAsArray(getAnnotations(last));
return new ICodingAnnotationItem[]{maxAgreementItem};
case MATCH:
ArrayList<HashSet<String>> annotationSets = new ArrayList<>();
HashSet<String> allAnnotations = Sets.newHashSet();
Expand All @@ -85,6 +78,13 @@ public ICodingAnnotationItem[] addItemSetsAsArray(Set<String>[] annotations) {
items.add(this.addItemAsArray(getAnnotations(item)));
}
return items.toArray(new ICodingAnnotationItem[0]);
case MAX:
default:
TreeSet<List<String>> treeSet = new TreeSet<>(sortByAgreement);
treeSet.addAll(cartesianProduct);
List<String> last = treeSet.last();
ICodingAnnotationItem maxAgreementItem = this.addItemAsArray(getAnnotations(last));
return new ICodingAnnotationItem[]{maxAgreementItem};
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ public class TTLabCodingIAACollectionProcessingEngine extends CodingIAACollectio
public static final String PARAM_PRUNE_PREFIX = "pPrunePrefix";
@ConfigurationParameter(
name = PARAM_PRUNE_PREFIX,
defaultValue = "org.texttechnologylab.annotation.type."
mandatory = false,
defaultValue = ""
// defaultValue = "org.texttechnologylab.annotation.type."
)
private String pPrunePrefix;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ public class TTLabUnitizingIAACollectionProcessingEngine extends UnitizingIAACol
public static final String PARAM_PRUNE_PREFIX = "pPrunePrefix";
@ConfigurationParameter(
name = PARAM_PRUNE_PREFIX,
defaultValue = "org.texttechnologylab.annotation.type."
mandatory = false,
defaultValue = ""
// defaultValue = "org.texttechnologylab.annotation.type."
)
private String pPrunePrefix;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.uima.util.CasIOUtils;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.hucompute.utilities.helper.RESTUtils;
import org.texttechnologylab.utilities.helper.RESTUtils;
import org.jetbrains.annotations.NotNull;
import org.json.JSONArray;
import org.json.JSONObject;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class InterAnnotatorAgreementEngineTest {
@Test
public void testAnnotatorAgreement() {
try {
final boolean download = true;
final boolean download = false;

String xmiPath = "src/test/out/xmi/";
CollectionReader collection;
Expand Down Expand Up @@ -70,39 +70,43 @@ public void testAnnotatorAgreement() {
CsvPrinterEngine.PARAM_FILTER_FINGERPRINTED, filterFingerprinted
));

String[] includeFlags = new String[]{TTLabUnitizingIAACollectionProcessingEngine.METAPHOR, TTLabUnitizingIAACollectionProcessingEngine.METONYM, TTLabUnitizingIAACollectionProcessingEngine.SPECIFIC};
String[] unitizingIncludeFlags = new String[]{TTLabUnitizingIAACollectionProcessingEngine.METAPHOR, TTLabUnitizingIAACollectionProcessingEngine.METONYM, TTLabUnitizingIAACollectionProcessingEngine.SPECIFIC};
ab.add(AnalysisEngineFactory.createEngineDescription(
TTLabUnitizingIAACollectionProcessingEngine.class,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATION_CLASSES, annotationClasses,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, includeFlags,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, unitizingIncludeFlags,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_MIN_VIEWS, 2,
// TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
// TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, UnitizingIAACollectionProcessingEngine.WHITELIST,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorBlacklist,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, UnitizingIAACollectionProcessingEngine.BLACKLIST,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, UnitizingIAACollectionProcessingEngine.WHITELIST,
// TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorBlacklist,
// TTLabUnitizingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, UnitizingIAACollectionProcessingEngine.BLACKLIST,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_FILTER_FINGERPRINTED, filterFingerprinted,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_MULTI_CAS_HANDLING, TTLabUnitizingIAACollectionProcessingEngine.BOTH,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_MIN_ANNOTATIONS, 10,
TTLabUnitizingIAACollectionProcessingEngine.PARAM_TARGET_LOCATION, "/resources/public/stoeckel/agreement/"
TTLabUnitizingIAACollectionProcessingEngine.PARAM_TARGET_LOCATION, "/resources/public/stoeckel/agreement/unitizing/"
));

String[] codingIncludeFlags = new String[]{TTLabCodingIAACollectionProcessingEngine.METAPHOR, TTLabCodingIAACollectionProcessingEngine.METONYM, TTLabCodingIAACollectionProcessingEngine.SPECIFIC};
ab.add(AnalysisEngineFactory.createEngineDescription(
TTLabCodingIAACollectionProcessingEngine.class,
TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATION_CLASSES, annotationClasses,
TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, codingIncludeFlags,
TTLabCodingIAACollectionProcessingEngine.PARAM_MIN_VIEWS, 2,
TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, TTLabCodingIAACollectionProcessingEngine.WHITELIST,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorBlacklist,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, UnitizingIAACollectionProcessingEngine.BLACKLIST,
TTLabCodingIAACollectionProcessingEngine.PARAM_FILTER_FINGERPRINTED, filterFingerprinted,
TTLabCodingIAACollectionProcessingEngine.PARAM_AGREEMENT_MEASURE, TTLabCodingIAACollectionProcessingEngine.KrippendorffAlphaAgreement,
TTLabCodingIAACollectionProcessingEngine.PARAM_SET_SELECTION_STRATEGY, SetSelectionStrategy.MAX,
TTLabCodingIAACollectionProcessingEngine.PARAM_MULTI_CAS_HANDLING, TTLabCodingIAACollectionProcessingEngine.BOTH,
TTLabCodingIAACollectionProcessingEngine.PARAM_MIN_ANNOTATIONS, 10,
TTLabCodingIAACollectionProcessingEngine.PARAM_TARGET_LOCATION, "/resources/public/stoeckel/agreement/coding/"
));

// includeFlags = new String[]{TTLabCodingIAACollectionProcessingEngine.METAPHOR, TTLabCodingIAACollectionProcessingEngine.METONYM};
// ab.add(AnalysisEngineFactory.createEngineDescription(
// TTLabCodingIAACollectionProcessingEngine.class,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATION_CLASSES, annotationClasses,
// TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, includeFlags,
// TTLabCodingIAACollectionProcessingEngine.PARAM_MIN_VIEWS, 2,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, TTLabCodingIAACollectionProcessingEngine.WHITELIST,
// TTLabCodingIAACollectionProcessingEngine.PARAM_FILTER_FINGERPRINTED, filterFingerprinted,
// TTLabCodingIAACollectionProcessingEngine.PARAM_AGREEMENT_MEASURE, TTLabCodingIAACollectionProcessingEngine.KrippendorffAlphaAgreement,
// TTLabCodingIAACollectionProcessingEngine.PARAM_SET_SELECTION_STRATEGY, SetSelectionStrategy.MATCH,
// TTLabCodingIAACollectionProcessingEngine.PARAM_MULTI_CAS_HANDLING, TTLabCodingIAACollectionProcessingEngine.BOTH
// ));
// ab.add(AnalysisEngineFactory.createEngineDescription(
// TTLabCodingIAACollectionProcessingEngine.class,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATION_CLASSES, annotationClasses,
// TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, includeFlags,
// TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, codingIncludeFlags,
// TTLabCodingIAACollectionProcessingEngine.PARAM_MIN_VIEWS, 2,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, TTLabCodingIAACollectionProcessingEngine.WHITELIST,
Expand All @@ -114,7 +118,7 @@ public void testAnnotatorAgreement() {
// ab.add(AnalysisEngineFactory.createEngineDescription(
// TTLabCodingIAACollectionProcessingEngine.class,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATION_CLASSES, annotationClasses,
// TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, includeFlags,
// TTLabCodingIAACollectionProcessingEngine.PARAM_INCLUDE_FLAGS, codingIncludeFlags,
// TTLabCodingIAACollectionProcessingEngine.PARAM_MIN_VIEWS, 2,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_LIST, annotatorWhitelist,
// TTLabCodingIAACollectionProcessingEngine.PARAM_ANNOTATOR_RELATION, TTLabCodingIAACollectionProcessingEngine.WHITELIST,
Expand Down

0 comments on commit 02f6206

Please sign in to comment.