Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1613 generate dynamic properties #1614

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions src/main/java/org/gbif/ipt/action/manage/MappingAction.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ public class MappingAction extends ManagerBaseAction {
private final Map<String, Map<String, String>> vocabTerms = Maps.newHashMap();
private ExtensionProperty coreid;
private ExtensionProperty datasetId;
private ExtensionProperty dynamicProperties;
private Integer mid;
private PropertyMapping mappingCoreid;
private boolean doiUsedForDatasetId;
private boolean generateJsonDynamicProperties;

@Inject
public MappingAction(SimpleTextProvider textProvider, AppConfig cfg, RegistrationManager registrationManager,
Expand Down Expand Up @@ -223,6 +225,10 @@ public ExtensionProperty getDatasetId() {
return datasetId;
}

public ExtensionProperty getDynamicProperties() {
return dynamicProperties;
}

public Integer getMid() {
return mid;
}
Expand Down Expand Up @@ -384,6 +390,7 @@ public void prepare() {
readSource();

datasetId = extensionManager.get(mapping.getExtension().getRowType()).getProperty(Constants.DWC_DATASET_ID);
dynamicProperties = extensionManager.get(mapping.getExtension().getRowType()).getProperty(Constants.DWC_DYNAMIC_PROPERTIES);

// prepare all other fields
for (int i = 0; i < mapping.getExtension().getProperties().size(); i++) {
Expand Down Expand Up @@ -423,6 +430,7 @@ public void prepare() {

// ensure existing configuration re-loaded
setDoiUsedForDatasetId(mapping.isDoiUsedForDatasetId());
setGenerateJsonDynamicProperties(mapping.isGenerateJsonDynamicProperties());

if (!isHttpPost()) {
validateAndReport();
Expand Down Expand Up @@ -498,6 +506,7 @@ public String save() throws IOException {
mapping.setFields(mappedFields);
// persist other configurations, e.g. using DOI as datasetId
mapping.setDoiUsedForDatasetId(doiUsedForDatasetId);
mapping.setGenerateJsonDynamicProperties(generateJsonDynamicProperties);
}
// update last modified dates
Date lastModified = new Date();
Expand Down Expand Up @@ -552,6 +561,14 @@ public void setDoiUsedForDatasetId(boolean doiUsedForDatasetId) {
this.doiUsedForDatasetId = doiUsedForDatasetId;
}

public boolean isGenerateJsonDynamicProperties() {
return generateJsonDynamicProperties;
}

public void setGenerateJsonDynamicProperties(boolean generateJsonDynamicProperties) {
this.generateJsonDynamicProperties = generateJsonDynamicProperties;
}

/**
* Called from Freemarker template.
*/
Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/gbif/ipt/config/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public final class Constants {
public static final String DWC_EVENT_ID = "http://rs.tdwg.org/dwc/terms/eventID";
public static final String DWC_BASIS_OF_RECORD = "http://rs.tdwg.org/dwc/terms/basisOfRecord";
public static final String DWC_DATASET_ID = "http://rs.tdwg.org/dwc/terms/datasetID";
public static final String DWC_DYNAMIC_PROPERTIES = "http://rs.tdwg.org/dwc/terms/dynamicProperties";
public static final String VOCAB_URI_DATASET_TYPE = "http://rs.gbif.org/vocabulary/gbif/datasetType";
public static final String VOCAB_URI_LANGUAGE = "http://iso.org/639-2";
public static final String VOCAB_URI_COUNTRY = "http://iso.org/iso3166-1/alpha2";
Expand Down
9 changes: 9 additions & 0 deletions src/main/java/org/gbif/ipt/model/ExtensionMapping.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public class ExtensionMapping implements Serializable {
private String idSuffix;
private RecordFilter filter;
private boolean doiUsedForDatasetId;
private boolean generateJsonDynamicProperties;
private Date lastModified;

/**
Expand Down Expand Up @@ -129,6 +130,10 @@ public boolean isDoiUsedForDatasetId() {
return doiUsedForDatasetId;
}

public boolean isGenerateJsonDynamicProperties() {
return generateJsonDynamicProperties;
}

/**
* @return date extension mapping was last modified
*/
Expand Down Expand Up @@ -164,6 +169,10 @@ public void setDoiUsedForDatasetId(boolean doiUsedForDatasetId) {
this.doiUsedForDatasetId = doiUsedForDatasetId;
}

public void setGenerateJsonDynamicProperties(boolean generateJsonDynamicProperties) {
this.generateJsonDynamicProperties = generateJsonDynamicProperties;
}

public void setLastModified(Date lastModified) {
this.lastModified = lastModified;
}
Expand Down
54 changes: 54 additions & 0 deletions src/main/java/org/gbif/ipt/task/DynamicPropertiesGenerator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.gbif.ipt.task;

import com.google.common.collect.Lists;
import com.google.gson.Gson;
import org.gbif.ipt.model.PropertyMapping;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class DynamicPropertiesGenerator {

private List<DynamicPropertiesMapping> unmappedProperties = Lists.newArrayList();

public void init(List<String> sourceColumns, Set<PropertyMapping> mappedProperties) {
unmappedProperties.clear();
if ((sourceColumns != null) && !sourceColumns.isEmpty()) {
for (int index=0; index<sourceColumns.size(); index++) {
String column = sourceColumns.get(index);
if (!isMapped(index, mappedProperties)) {
DynamicPropertiesMapping unmappedProperty = new DynamicPropertiesMapping();
unmappedProperty.name = column;
unmappedProperty.index = index;
unmappedProperties.add(unmappedProperty);
}
}
}
}

public String generateJson(String[] values) {
Map<String, String> map = new HashMap<String, String>();
for (DynamicPropertiesMapping prop : unmappedProperties) {
if ((prop.name != null) && (prop.index < values.length)) {
map.put(prop.name, values[prop.index]);
}
}
return new Gson().toJson(map);
}

private boolean isMapped(Integer index, Set<PropertyMapping> mappedProperties) {
for (PropertyMapping p : mappedProperties) {
if (index.equals(p.getIndex())) {
return true;
}
}
return false;
}

private class DynamicPropertiesMapping {
String name;
Integer index;
}
}
67 changes: 46 additions & 21 deletions src/main/java/org/gbif/ipt/task/GenerateDwca.java
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ public void addDataFile(List<ExtensionMapping> mappings, @Nullable Integer rowLi

/**
* Write the header column line to file.
*
*
* @param propertyList ordered list of all ExtensionProperty that have been mapped across all mappings for a single
* Extension
* @param totalColumns total number of columns in header
Expand All @@ -243,7 +243,7 @@ private void writeHeaderLine(List<ExtensionProperty> propertyList, int totalColu

/**
* Adds EML file to DwC-A folder.
*
*
* @throws GeneratorException if EML file could not be copied to DwC-A folder
* @throws InterruptedException if executing thread was interrupted
*/
Expand All @@ -266,7 +266,7 @@ private void addEmlFile() throws GeneratorException, InterruptedException {
* </br>
* Since all default values ​​will be written in the data file, they won't be expressed in the archive file (meta.xml).
* That's why the default value is always set to null.
*
*
* @param term ConceptTerm
* @param delimitedBy multi-value delimiter
*
Expand All @@ -288,7 +288,7 @@ private ArchiveField buildField(Term term, @Nullable String delimitedBy) {
/**
* Zips the DwC-A folder. A temp version is created first, and when successful, it it moved into the resource's
* data directory.
*
*
* @throws GeneratorException if DwC-A could not be zipped or moved
* @throws InterruptedException if executing thread was interrupted
*/
Expand Down Expand Up @@ -328,7 +328,7 @@ private void bundleArchive() throws GeneratorException, InterruptedException {
* Validate the DwC-A:
* -ensure that if the core record identifier is mapped (e.g. occurrenceID, taxonID, etc) it is present on all
* rows, and is unique
*
*
* @throws GeneratorException if DwC-A could not be validated
* @throws InterruptedException if executing thread was interrupted
*/
Expand Down Expand Up @@ -360,7 +360,7 @@ private void validate() throws GeneratorException, InterruptedException {

/**
* Sort the data file of a Darwin Core Archive by a column. Sorting is case sensitive.
*
*
* @param file unsorted file
* @param column column to sort by file by
*
Expand Down Expand Up @@ -912,7 +912,7 @@ private boolean isEventCore(Archive arch) {

/**
* Method responsible for all stages of DwC-A file generation.
*
*
* @return number of records published in core file
* @throws GeneratorException if DwC-A generation fails for any reason
*/
Expand Down Expand Up @@ -985,7 +985,7 @@ public Map<String, Integer> call() throws Exception {

/**
* Checks if the executing thread has been interrupted, i.e. DwC-A generation was cancelled.
*
*
* @throws InterruptedException if the thread was found to be interrupted
*/
private void checkForInterruption() throws InterruptedException {
Expand All @@ -999,7 +999,7 @@ private void checkForInterruption() throws InterruptedException {

/**
* Checks if the executing thread has been interrupted, i.e. DwC-A generation was cancelled.
*
*
* @param line number of lines currently processed at the time of the check
* @throws InterruptedException if the thread was found to be interrupted
*/
Expand All @@ -1019,7 +1019,7 @@ protected boolean completed() {

/**
* Create data files.
*
*
* @throws GeneratorException if the resource had no core file that was mapped
* @throws InterruptedException if the thread was interrupted
*/
Expand Down Expand Up @@ -1047,7 +1047,7 @@ private void createDataFiles() throws GeneratorException, InterruptedException {

/**
* Create meta.xml file.
*
*
* @throws GeneratorException if meta.xml file creation failed
* @throws InterruptedException if the thread was interrupted
*/
Expand Down Expand Up @@ -1131,6 +1131,12 @@ private void dumpData(Writer writer, PropertyMapping[] inCols, ExtensionMapping
ClosableReportingIterator<String[]> iter = null;
int line = 0;
try {
DynamicPropertiesGenerator dynamicPropertiesGenerator = new DynamicPropertiesGenerator();
if (mapping.isGenerateJsonDynamicProperties()) {
List<String> sourceColumns = sourceManager.columns(mapping.getSource());
dynamicPropertiesGenerator.init(sourceColumns, mapping.getFields());
}

// get the source iterator
iter = sourceManager.rowIterator(mapping.getSource());

Expand Down Expand Up @@ -1178,7 +1184,7 @@ else if (isEmptyLine(in)) {
&& filter.getParam() != null) {
boolean matchesFilter;
if (filter.getFilterTime() == RecordFilter.FilterTime.AfterTranslation) {
applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi);
applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi, mapping.isGenerateJsonDynamicProperties(), dynamicPropertiesGenerator);
matchesFilter = filter.matches(in);
alreadyTranslated = true;
} else {
Expand Down Expand Up @@ -1206,8 +1212,9 @@ else if (isEmptyLine(in)) {

// go through all archive fields
if (!alreadyTranslated) {
applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi);
applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi, mapping.isGenerateJsonDynamicProperties(), dynamicPropertiesGenerator);
}

String newRow = tabRow(record);
if (newRow != null) {
writer.write(newRow);
Expand Down Expand Up @@ -1279,7 +1286,7 @@ else if (isEmptyLine(in)) {

/**
* Sets an exception and state of the worker to FAILED. The final StatusReport is generated at the end.
*
*
* @param e exception
*/
private void setState(Exception e) {
Expand All @@ -1290,7 +1297,7 @@ private void setState(Exception e) {

/**
* Sets only the state of the worker. The final StatusReport is generated at the end.
*
*
* @param s STATE of worker
*/
private void setState(STATE s) {
Expand Down Expand Up @@ -1338,7 +1345,7 @@ protected String tabRow(String[] columns) {
* @param doi DOI assigned to resource
*/
private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] record, boolean doiUsedForDatasetId,
DOI doi) {
DOI doi, boolean generateJsonDynamicProperties, DynamicPropertiesGenerator dynamicPropertiesGenerator) {
for (int i = 1; i < inCols.length; i++) {
PropertyMapping pm = inCols[i];
String val = null;
Expand All @@ -1361,6 +1368,10 @@ private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] r
&& doi != null) {
val = doi.getDoiString();
}
// generate JSON for dynamicProperties field?
if (pm.getTerm().qualifiedName().equalsIgnoreCase(Constants.DWC_DYNAMIC_PROPERTIES) && generateJsonDynamicProperties) {
val = dynamicPropertiesGenerator.generateJson(in);
}
}
// add value to data file record
record[i] = val;
Expand All @@ -1369,7 +1380,7 @@ private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] r

/**
* Print a line representation of a string array used for logging.
*
*
* @param in String array
* @return line
*/
Expand All @@ -1388,7 +1399,7 @@ private String printLine(String[] in) {

/**
* Write message from exception to publication log file as a new line but suffocate any exception thrown.
*
*
* @param e exception to write message from
*/
private void writeFailureToPublicationLog(Throwable e) {
Expand All @@ -1408,7 +1419,7 @@ private void writeFailureToPublicationLog(Throwable e) {
* First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped
* term a field in the final archive. Static/default mappings are not stored for a field, since they are not
* expressed in meta.xml but instead get written to the data file.
*
*
* @param mappings list of ExtensionMapping
* @param af ArchiveFile
*
Expand Down Expand Up @@ -1461,6 +1472,20 @@ private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFil
// include datasetID in set of all terms mapped for Extension
mappedConceptTerms.add(DwcTerm.datasetID);
}
// if Extension has dynamicProperties concept term, check if generateJsonDynamicProperties should be used as value for mapping
ExtensionProperty epd = m.getExtension().getProperty(DwcTerm.dynamicProperties.qualifiedName());
if (epd != null && m.isGenerateJsonDynamicProperties()) {
log.debug("Detected that dynamicProperties should be generated..");
// include dynamicProperties field in ArchiveFile
ArchiveField f = buildField(DwcTerm.dynamicProperties, null);
af.addField(f);
// include dynamicProperties field mapping in ExtensionMapping
PropertyMapping pm = new PropertyMapping(f);
pm.setTerm(epd);
m.getFields().add(pm);
// include dynamicProperties in set of all terms mapped for Extension
mappedConceptTerms.add(DwcTerm.dynamicProperties);
}
}
return mappedConceptTerms;
}
Expand All @@ -1469,7 +1494,7 @@ private Set<Term> addFieldsToArchive(List<ExtensionMapping> mappings, ArchiveFil
* Iterate through ordered list of those ExtensionProperty that have been mapped, and reassign the ArchiveFile
* ArchiveField indexes, based on the order of their appearance in the ordered list be careful to reserve index 0 for
* the ID column
*
*
* @param propertyList ordered list of those ExtensionProperty that have been mapped
* @param af ArchiveFile
*/
Expand All @@ -1494,7 +1519,7 @@ private void assignIndexesOrderedByExtension(List<ExtensionProperty> propertyLis

/**
* Retrieve the ordered list of all Extension's mapped ExtensionProperty. Ordering is done according to Extension.
*
*
* @param ext Extension
* @param mappedConceptTerms set of all mapped ConceptTerm
* @return ordered list of mapped ExtensionProperty
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/ApplicationResources_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ manage.mapping.datasetIdColumn.help=Please check the "Use resource DOI" box if y
manage.mapping.noid=No ID
manage.mapping.lineNumber=Line Number
manage.mapping.uuid=UUID Generator
manage.mapping.dynamic.all.unmapped.fields=Generate JSON object from all unmapped columns
manage.mapping.hideEmpty=Hide unmapped fields
manage.mapping.hideGroups=Hide redundant classes
manage.mapping.redundant=Redundant classes
Expand Down
Loading