Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making decorator for text retrieval configurable #324

Merged
merged 4 commits into from
Jul 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,27 +81,32 @@ public boolean persist(List<PersistentTuple> tuples) {
return o;
}
}).toArray();
insert.append(values);
if (insert.serializedSize() >= Constants.MAX_PAGE_SIZE_BYTES) {
LOGGER.trace("Inserting msg of size {} into {}", insert.serializedSize(), this.fqn);
if (insert.serializedSize() >= Constants.MAX_PAGE_SIZE_BYTES - 10_000) { // cottontail sometimes acts up which is why we don't fully trust the max size
LOGGER.trace("Inserting msg of size {} with {} elements into {}", insert.serializedSize(), insert.count(), this.fqn);
this.cottontail.client.insert(insert);
insert = new BatchInsert().into(this.fqn).columns(this.names);
if (useTransactions) {
insert.txId(txId);
}
}
boolean append = insert.append(values);
if (!append) {
LOGGER.error("Value could not be appended to batch-insert");
}
}
if (insert.count() > 0) {
LOGGER.trace("Inserting msg of size {} into {}", insert.serializedSize(), this.fqn);
LOGGER.trace("Finalizing: Inserting msg of size {} with {} elements into {}", insert.serializedSize(), insert.count(), this.fqn);
this.cottontail.client.insert(insert);
}
if (useTransactions) {
LOGGER.trace("Committing");
this.cottontail.client.commit(txId);
}
long stop = System.currentTimeMillis();
LOGGER.trace("Completed insert of {} elements in {} ms", size, stop - start);
return true;
} catch (StatusRuntimeException e) {
LOGGER.error(e);
if (useTransactions) {
this.cottontail.client.rollback(txId);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.vitrivr.cineast.core.features;

import java.util.Map;
import org.vitrivr.cineast.core.data.entities.SimpleFulltextFeatureDescriptor;
import org.vitrivr.cineast.core.data.segments.SegmentContainer;
import org.vitrivr.cineast.core.features.abstracts.AbstractTextRetriever;
Expand All @@ -15,6 +16,10 @@ public AudioTranscriptionSearch() {
super(AudioTranscriptionSearch.AUDIO_TRANSCRIPTION_TABLE_NAME);
}

public AudioTranscriptionSearch(Map<String, String> properties) {
super(AUDIO_TRANSCRIPTION_TABLE_NAME, properties);
}

/**
* Extracts the subtitle text and ingests it using the {@link SimpleFulltextFeatureDescriptor}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.vitrivr.cineast.core.db.RelationalOperator;
import org.vitrivr.cineast.core.features.abstracts.BooleanRetriever;

Expand All @@ -22,7 +22,7 @@ public CollectionBooleanRetriever(String entity, Collection<String> attributes)
super(entity, attributes);
}

public CollectionBooleanRetriever(LinkedHashMap<String, String> properties) {
public CollectionBooleanRetriever(Map<String, String> properties) {
super(properties);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.vitrivr.cineast.core.features;

import java.util.Map;
import org.vitrivr.cineast.core.features.abstracts.AbstractTextRetriever;

/**
Expand All @@ -15,4 +16,9 @@ public class ProvidedOcrSearch extends AbstractTextRetriever {
public ProvidedOcrSearch() {
super(ProvidedOcrSearch.PROVIDED_OCR_SEARCH_TABLE_NAME);
}

public ProvidedOcrSearch(Map<String, String> properties) {
super(ProvidedOcrSearch.PROVIDED_OCR_SEARCH_TABLE_NAME, properties);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.vitrivr.cineast.core.data.providers.primitive.PrimitiveProviderComparator;
import org.vitrivr.cineast.core.data.providers.primitive.PrimitiveTypeProvider;
import org.vitrivr.cineast.core.db.RelationalOperator;
Expand All @@ -32,7 +32,7 @@ protected RangeBooleanRetriever(String entity, Collection<String> attributes) {
super(entity, attributes);
}

public RangeBooleanRetriever(LinkedHashMap<String, String> properties) {
public RangeBooleanRetriever(Map<String, String> properties) {
super(properties);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ public List<ScoreElement> getSimilar(SegmentContainer sc, ReadableQueryConfig qc
results.add(new SegmentScoreElement(segment, this.correspondence.applyAsDouble(minDist)));
}
results.sort(SegmentScoreElement.SCORE_COMPARATOR.reversed());
return results.subList(0, Math.min(results.size(), qc.getRawResultsPerModule()) - 1);
return results.subList(0, Math.min(results.size(), qc.getResultsPerModule()) - 1);
}

//more than query skeleton
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import com.google.common.collect.ImmutableList;
import java.nio.file.Path;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -50,7 +50,7 @@ public SpatialDistance() {
correspondenceFunction = CorrespondenceFunction.hyperbolic(halfSimilarityDistance);
}

public SpatialDistance(LinkedHashMap<String, String> properties) {
public SpatialDistance(Map<String, String> properties) {
super(2, properties);
String halfSimDistFromConfig = properties.getOrDefault("halfSimilarityDistance", "1000.0/3.0");
halfSimilarityDistance = parseAndEvaluateHalfSimilarityDistance(halfSimDistFromConfig);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package org.vitrivr.cineast.core.features;

import java.util.Map;
import org.vitrivr.cineast.core.data.entities.SimpleFulltextFeatureDescriptor;
import org.vitrivr.cineast.core.data.segments.SegmentContainer;
import org.vitrivr.cineast.core.features.abstracts.AbstractTextRetriever;

public class SubtitleFulltextSearch extends AbstractTextRetriever {

private static final String SUBTITLE_TABLE_NAME = "features_subtitles";

/**
* Default constructor for {@link SubtitleFulltextSearch}.
*/
public SubtitleFulltextSearch() {
super("features_asr");
super(SUBTITLE_TABLE_NAME);
}

public SubtitleFulltextSearch(Map<String, String> properties) {
super(SUBTITLE_TABLE_NAME, properties);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ public abstract class AbstractTextRetriever implements Retriever, Extractor {
* Name of the table/entity used to store the data.
*/
private final String tableName;

/**
* decorator for lucene queries
*/
private final String decorator;

/**
* The {@link DBSelector} used for database lookup.
*/
Expand All @@ -62,7 +68,19 @@ public abstract class AbstractTextRetriever implements Retriever, Extractor {
* @param tableName Name of the table/entity used to store the data
*/
public AbstractTextRetriever(String tableName) {
this.tableName = tableName;
this(tableName, new HashMap<>());
}

public AbstractTextRetriever(String defaultTableName, Map<String, String> properties) {
if (defaultTableName == null) {
throw new IllegalStateException("If no entity is provided by the underlying feature, it needs to be specified in properties");
}
this.tableName = properties.getOrDefault("entity", defaultTableName);
this.decorator = properties.getOrDefault("decorator", "");
}

public AbstractTextRetriever(Map<String, String> properties) {
this(properties.get("entity"), properties);
}

@Override
Expand Down Expand Up @@ -171,7 +189,7 @@ protected String[] generateQuery(SegmentContainer sc, ReadableQueryConfig qc) {
* Implementing features can transform individual query terms. By default, nothing happens
*/
protected String enrichQueryTerm(String queryTerm) {
return queryTerm;
return queryTerm + this.decorator;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,5 @@ public static void retrieveAndLog(List<Retriever> retrievers, ContinuousRetrieva
}
System.out.println();
});
retrieval.shutdown();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public void execute() {
retrievers.add(new SegmentTags());

CliUtils.retrieveAndLog(retrievers, retrieval, limit, printDetail, qc);
retrieval.shutdown();
System.out.println("Done");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import com.github.rvesse.airline.annotations.Command;
import com.github.rvesse.airline.annotations.Option;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
import org.vitrivr.cineast.core.data.query.containers.TextQueryTermContainer;
import org.vitrivr.cineast.core.features.AudioTranscriptionSearch;
import org.vitrivr.cineast.core.features.OCRSearch;
import org.vitrivr.cineast.core.features.SubtitleFulltextSearch;
import org.vitrivr.cineast.core.features.retriever.Retriever;
import org.vitrivr.cineast.standalone.config.Config;
Expand All @@ -30,10 +30,14 @@ public void execute() {
System.out.println("Querying for text " + text);
TextQueryTermContainer qc = new TextQueryTermContainer(text);
List<Retriever> retrievers = new ArrayList<>();
Config.sharedConfig().getRetriever().getRetrieversByCategory("ocr").forEach(retriever -> {
CliUtils.retrieveAndLog(Lists.newArrayList(retriever), retrieval, limit, printDetail, qc);
return true;
});
retrievers.add(new SubtitleFulltextSearch());
retrievers.add(new OCRSearch());
retrievers.add(new AudioTranscriptionSearch());
CliUtils.retrieveAndLog(retrievers, retrieval, limit, printDetail, qc);
retrieval.shutdown();
System.out.println("Done");
}
}