Skip to content

Commit

Permalink
Merge pull request #55 from CedricBorko/mongodb-storage
Browse files Browse the repository at this point in the history
MongoDB DUUIStorageBackend
  • Loading branch information
abrami authored Oct 11, 2023
2 parents f422a0f + 838e8aa commit 6250365
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 4 deletions.
9 changes: 6 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,12 @@
<version>2.0.1</version>
</dependency>

<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongodb-driver-sync</artifactId>
<version>4.10.2</version>
</dependency>

<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
Expand All @@ -338,9 +344,6 @@
<version>2.15.2</version>
</dependency>




</dependencies>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

import com.arangodb.entity.BaseDocument;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;

import java.util.HashMap;
import java.util.Map;
import java.util.Vector;


public class DUUIPipelineDocumentPerformance {
private Vector<DUUIPipelinePerformancePoint> _points;
private String _runKey;
Expand All @@ -20,6 +23,11 @@ public class DUUIPipelineDocumentPerformance {
private Long _documentWaitTime;
private String document;

/**
* Stores the types of annotations and how many were made.
*/
private Map<String, Integer> annotationTypesCount;

/**
* Whether to track error documents in the database or not
*/
Expand Down Expand Up @@ -57,7 +65,7 @@ public DUUIPipelineDocumentPerformance(String runKey, long waitDocumentTime, JCa
catch (Exception e){
document = null;
}

annotationTypesCount = new HashMap<>();
}

/**
Expand All @@ -82,6 +90,14 @@ public void addData(long durationSerialize, long durationDeserialize, long durat
_durationTotalAnnotator += durationAnnotator;
_durationTotalMutexWait += durationMutexWait;
_durationTotal += durationComponentTotal;

for (Annotation annotation : jc.getAnnotationIndex()) {
annotationTypesCount.put(
annotation.getClass().getCanonicalName(),
JCasUtil.select(jc, annotation.getClass()).size()
);
}

_points.add(new DUUIPipelinePerformancePoint(durationSerialize,durationDeserialize,durationAnnotator,durationMutexWait,durationComponentTotal,componentKey,serializeSize, jc, error, document));
}

Expand Down Expand Up @@ -131,4 +147,8 @@ public BaseDocument toArangoDocument() {
public String getDocument() {
return document;
}

public Map<String, Integer> getAnnotationTypesCount() {
return annotationTypesCount;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package org.texttechnologylab.DockerUnifiedUIMAInterface.pipeline_storage.mongodb;

import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.MongoCollection;

import com.mongodb.client.model.Filters;
import org.bson.Document;
import org.texttechnologylab.DockerUnifiedUIMAInterface.DUUIComposer;
import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.DUUIPipelineComponent;
import org.texttechnologylab.DockerUnifiedUIMAInterface.driver.IDUUIPipelineComponent;
import org.texttechnologylab.DockerUnifiedUIMAInterface.pipeline_storage.DUUIPipelineDocumentPerformance;
import org.texttechnologylab.DockerUnifiedUIMAInterface.pipeline_storage.DUUIPipelinePerformancePoint;
import org.texttechnologylab.DockerUnifiedUIMAInterface.pipeline_storage.IDUUIStorageBackend;

import java.net.UnknownHostException;
import java.time.Instant;
import java.util.HashMap;

public class DUUIMongoDBStorageBackend implements IDUUIStorageBackend {

private final boolean trackErrorDocs;
private final MongoClient _client;

public DUUIMongoDBStorageBackend(String connectionURI, boolean trackErrorDocs) {
this.trackErrorDocs = trackErrorDocs;
_client = MongoClients.create(connectionURI);
}

public DUUIMongoDBStorageBackend(String connectionURI) {
this(connectionURI, false);
}

@Override
public void addNewRun(String name, DUUIComposer composer) {
MongoDatabase database = this._client.getDatabase("duui_metrics");
MongoCollection<Document> pipelineCollection = database.getCollection("pipeline");
MongoCollection<Document> performanceCollection = database.getCollection("pipeline_perf");
MongoCollection<Document> documentPerformanceCollection = database.getCollection("pipeline_document_perf");
MongoCollection<Document> componentCollection = database.getCollection("pipeline_component");

pipelineCollection.findOneAndDelete(Filters.eq("name", name));
performanceCollection.findOneAndDelete(Filters.eq("name", name));
documentPerformanceCollection.findOneAndDelete(Filters.eq("pipelinename", name));
componentCollection.findOneAndDelete(Filters.eq("name", name));

pipelineCollection.insertOne(
new Document("name", name)
.append("workers", composer.getWorkerCount())
);

for (DUUIPipelineComponent component : composer.getPipeline()) {
String description = component.toJson();
long hash = component.getFinalizedRepresentationHash();
componentCollection.insertOne(
new Document("hash", hash)
.append("name", name)
.append("description", description));
}

}

@Override
public void addMetricsForDocument(DUUIPipelineDocumentPerformance perf) {
MongoDatabase database = this._client.getDatabase("duui_metrics");
MongoCollection<Document> documentCollection = database.getCollection("pipeline_document");
MongoCollection<Document> documentPerformanceCollection = database.getCollection("pipeline_document_perf");

documentCollection.insertOne(
new Document("documentSize", perf.getDocumentSize())
.append("waitTime", perf.getDocumentWaitTime())
.append("totalTime", perf.getTotalTime())
.append("document", perf.getDocument())
.append("annotationsTypeCount", perf.getAnnotationTypesCount())
);

for (DUUIPipelinePerformancePoint point : perf.getPerformancePoints()) {
documentPerformanceCollection.insertOne(
new Document("pipelinename", perf.getRunKey())
.append("componenthash", point.getKey())
.append("durationSerialize", point.getDurationSerialize())
.append("durationDeserialize", point.getDurationDeserialize())
.append("durationAnnotator", point.getDurationAnnotator())
.append("durationMutexWait", point.getDurationMutexWait())
.append("durationComponentTotal", point.getDurationComponentTotal())
.append("totalAnnotations", point.getNumberOfAnnotations())
.append("documentSize", point.getDocumentSize())
.append("serializedSize", point.getSerializedSize())
.append("error", point.getError())
.append("document", point.getDocument())
);
}
}

/**
* Populates a IDUUIPipelineComponent from a HashMap of options that is loaded from the MongoDB
* storage backend. If not options are present an emtpy component is returned instead.
*
* @param hash The finalized component hash.
* @return Populated IDUUIPipelineComponent from options stored in MongoDB.
*/
@Override
public IDUUIPipelineComponent loadComponent(String hash) {
MongoDatabase database = this._client.getDatabase("duui_metrics");
MongoCollection<Document> collection = database.getCollection("pipeline_component");
Document component = collection.find(Filters.eq("hash", Long.parseLong(hash))).first();
if (component == null) {
return new IDUUIPipelineComponent();
}

Document options = Document.parse(component.getString("description")).get("options", Document.class);
if (options == null) {
return new IDUUIPipelineComponent();
}

HashMap<String, Object> optionsMap = new HashMap<>(options);
return new IDUUIPipelineComponent(optionsMap);
}


@Override
public void finalizeRun(String name, Instant start, Instant end) {
MongoDatabase database = this._client.getDatabase("duui_metrics");
MongoCollection<Document> performanceCollection = database.getCollection("pipeline_perf");

performanceCollection.insertOne(
new Document("name", name)
.append("startTime", start.toEpochMilli())
.append("endTime", end.toEpochMilli())
);
}

@Override
public void shutdown() throws UnknownHostException {
System.out.print("[DUUIMongoStorageBackend] Shutting down.\n");
// TODO Should something happen here?
}

@Override
public boolean shouldTrackErrorDocs() {
return trackErrorDocs;
}
}

0 comments on commit 6250365

Please sign in to comment.