Skip to content

Commit

Permalink
Adding ProvidedOCR feature, option to disable transactions during imp…
Browse files Browse the repository at this point in the history
…ort (#275)

Former-commit-id: 54eedfb
  • Loading branch information
silvanheller authored Mar 11, 2022
1 parent 2bccecc commit 1dfbae8
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 12 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ allprojects {
group = 'org.vitrivr'

/* Our current version, on dev branch this should always be release+1-SNAPSHOT */
version = '3.8.4'
version = '3.8.5'

apply plugin: 'java-library'
apply plugin: 'maven-publish'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public final class DatabaseConfig {
private DataSource selector = DataSource.COTTONTAIL;

private Integer batchsize = DEFAULT_BATCH_SIZE;
private boolean useTransactions = true;

@JsonCreator
public DatabaseConfig() {
Expand Down Expand Up @@ -102,4 +103,13 @@ public Supplier<EntityCreator> getEntityCreatorSupplier() {
public DBSelectorSupplier getSelectorSupplier() {
return this.selector.getSelectorSupplier(this);
}

@JsonProperty
public boolean getUseTransactions() {
return useTransactions;
}

public void setUseTransactions(boolean useTransactions) {
this.useTransactions = useTransactions;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public PersistencyWriterSupplier getWriterSupplier(DatabaseConfig config) {
case NONE:
return NoDBWriter::new;
case COTTONTAIL:
return () -> new CottontailWriter(new CottontailWrapper(config.getHost(), config.getPort()), config.getBatchsize());
return () -> new CottontailWriter(new CottontailWrapper(config.getHost(), config.getPort()), config.getBatchsize(), config.getUseTransactions());
case POLYPHENY:
return () -> new PolyphenyWriter(new PolyphenyWrapper(config.getHost(), config.getPort()), config.getBatchsize());
case JSON:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ public final class CottontailWriter extends AbstractPersistencyWriter<Insert> {

/** The batch size to use for INSERTS. */
private final int batchSize;
private final boolean useTransactions;

public CottontailWriter(CottontailWrapper wrapper, int batchSize) {
public CottontailWriter(CottontailWrapper wrapper, int batchSize, boolean useTransactions) {
this.cottontail = wrapper;
this.batchSize = batchSize;
this.useTransactions = useTransactions;
}

@Override
Expand Down Expand Up @@ -57,9 +59,15 @@ public boolean exists(String key, String value) {
public boolean persist(List<PersistentTuple> tuples) {
long start = System.currentTimeMillis();
int size = tuples.size();
final long txId = this.cottontail.client.begin();
long txId = 0L;
if(useTransactions){
txId = this.cottontail.client.begin();
}
try {
BatchInsert insert = new BatchInsert().into(this.fqn).columns(this.names).txId(txId);
BatchInsert insert = new BatchInsert().into(this.fqn).columns(this.names);
if(useTransactions){
insert.txId(txId);
}
while (!tuples.isEmpty()) {
final PersistentTuple tuple = tuples.remove(0);
final Object[] values = tuple.getElements().stream().map(o -> {
Expand All @@ -80,12 +88,16 @@ public boolean persist(List<PersistentTuple> tuples) {
LOGGER.trace("Inserting msg of size {} into {}", insert.size(), this.fqn);
this.cottontail.client.insert(insert);
}
this.cottontail.client.commit(txId);
if(useTransactions){
this.cottontail.client.commit(txId);
}
long stop = System.currentTimeMillis();
LOGGER.trace("Completed insert of {} elements in {} ms", size, stop - start);
return true;
} catch (StatusRuntimeException e) {
this.cottontail.client.rollback(txId);
if(useTransactions){
this.cottontail.client.rollback(txId);
}
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.vitrivr.cineast.core.features;

import org.vitrivr.cineast.core.features.abstracts.AbstractTextRetriever;

/**
* Used when OCR is provided by an external API, e.g. Google Vision
*/
public class ProvidedOcrSearch extends AbstractTextRetriever {

public static final String PROVIDED_OCR_SEARCH_TABLE_NAME = "features_providedOcr";

/**
* Default constructor for {@link ProvidedOcrSearch}.
*/
public ProvidedOcrSearch() {
super(ProvidedOcrSearch.PROVIDED_OCR_SEARCH_TABLE_NAME);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CottontailWrapper getWrapper() {

@Override
public PersistencyWriter<Insert> getPersistencyWriter() {
return new CottontailWriter(getWrapper(), this.config.getBatchsize());
return new CottontailWriter(getWrapper(), this.config.getBatchsize(), true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,16 @@ public class ImportCommand implements Runnable {
@Option(name = {"--no-finalize"}, title = "Do Not Finalize", description = "If this flag is not set, automatically rebuilds indices & optimizes all entities when writing to cottontail after the import. Set this flag when you want more performance with external parallelism.")
private boolean doNotFinalize = false;

@Option(name = {"--no-transactions"}, title = "Do Not Use Transactions", description = "If this flag is not set, the default behavior is used which means transactions are enabled during import. Set this flag when you want more performance and manage transactional aspects yourself.")
private boolean noTransactions = false;

@Override
public void run() {
System.out.printf("Starting import of type %s for '%s'.%n", this.type, this.input);
System.out.printf("Starting import of type %s for '%s'. Batchsize %d, %d threads. Clean %b, no-finalize %b .%n", this.type, this.input, this.batchsize, this.threads, this.clean, this.doNotFinalize);
final Path path = Paths.get(this.input);
if(noTransactions){
Config.sharedConfig().getDatabase().setUseTransactions(false);
}
final ImportType type = ImportType.valueOf(this.type.toUpperCase());
DataImportHandler handler = null;
boolean isGoogleVision = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public void run() {
retrievers.add(new OCRSearch());
retrievers.add(new AudioTranscriptionSearch());
retrievers.add(new DescriptionTextSearch());
retrievers.add(new TagsFtSearch());
CliUtils.retrieveAndLog(retrievers, retrieval, limit, printDetail, qc);
System.out.println("Done");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ public IngestConfig(@JsonProperty(value = "type") MediaType type,
if (this.database.getPlaintext() == DatabaseConfig.DEFAULT_PLAINTEXT) {
this.database.setPlaintext(globalDb.getPlaintext());
}
if (this.database.getUseTransactions()) {
this.database.setUseTransactions(globalDb.getUseTransactions());
}

/* Merge with global settings if not set. */
final ExtractionPipelineConfig globalExt = Config.sharedConfig().getExtractor();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public JsonDataImportHandler(int threads, int batchsize) {
public void doImport(Path path) {
try {
LOGGER.info("Starting data import with JSON files in: {}", path.toString());
Files.walk(path, 2).filter(p -> p.toString().toLowerCase().endsWith(".json")).forEach(p -> {
Files.walk(path, 3).filter(p -> p.toString().toLowerCase().endsWith(".json")).forEach(p -> {
final String filename = p.getFileName().toString();
final String suffix = filename.substring(filename.lastIndexOf("."));
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public static void reportExecutionTime(String name, long milliseconds) {
* You can call this method without worrying if prometheus support is enabled
*/
public static void reportImportProgress(int insertedTuples, String entityName, long executiontime) {
LOGGER.trace("Completed import of {} tuples on {} in {} ms", insertedTuples, entityName, executiontime);
LOGGER.trace("import progress: {} tuples on {} in {} ms", insertedTuples, entityName, executiontime);
if (importProgress != null && batchImportTime != null) {
importProgress.labels(entityName).inc(insertedTuples);
batchImportTime.labels(entityName, String.valueOf(insertedTuples)).observe(executiontime);
Expand Down

0 comments on commit 1dfbae8

Please sign in to comment.