From d3b4dffcee24da8e5e9728c334a18a65217ae493 Mon Sep 17 00:00:00 2001 From: sylmorin Date: Mon, 19 Jul 2021 11:53:49 +0200 Subject: [PATCH] 1613 Generate dynamic properties from unmapped columns --- .../gbif/ipt/action/manage/MappingAction.java | 17 +++ .../java/org/gbif/ipt/config/Constants.java | 1 + .../org/gbif/ipt/model/ExtensionMapping.java | 9 ++ .../ipt/task/DynamicPropertiesGenerator.java | 54 ++++++++ .../java/org/gbif/ipt/task/GenerateDwca.java | 67 +++++++--- .../ApplicationResources_en.properties | 1 + .../webapp/WEB-INF/pages/manage/mapping.ftl | 11 +- .../org/gbif/ipt/task/GenerateDwcaTest.java | 125 ++++++++++++++++++ .../res1/occurrence_dynamic_properties.txt | 3 + .../resource_generate_dynamic_properties.xml | 71 ++++++++++ .../resource_standard_dynamic_properties.xml | 77 +++++++++++ 11 files changed, 413 insertions(+), 23 deletions(-) create mode 100644 src/main/java/org/gbif/ipt/task/DynamicPropertiesGenerator.java create mode 100644 src/test/resources/resources/res1/occurrence_dynamic_properties.txt create mode 100644 src/test/resources/resources/res1/resource_generate_dynamic_properties.xml create mode 100644 src/test/resources/resources/res1/resource_standard_dynamic_properties.xml diff --git a/src/main/java/org/gbif/ipt/action/manage/MappingAction.java b/src/main/java/org/gbif/ipt/action/manage/MappingAction.java index 06565765c5..aaadb99db6 100644 --- a/src/main/java/org/gbif/ipt/action/manage/MappingAction.java +++ b/src/main/java/org/gbif/ipt/action/manage/MappingAction.java @@ -87,9 +87,11 @@ public class MappingAction extends ManagerBaseAction { private final Map> vocabTerms = Maps.newHashMap(); private ExtensionProperty coreid; private ExtensionProperty datasetId; + private ExtensionProperty dynamicProperties; private Integer mid; private PropertyMapping mappingCoreid; private boolean doiUsedForDatasetId; + private boolean generateJsonDynamicProperties; @Inject public MappingAction(SimpleTextProvider textProvider, AppConfig cfg, RegistrationManager registrationManager, @@ -223,6 +225,10 @@ public ExtensionProperty getDatasetId() { return datasetId; } + public ExtensionProperty getDynamicProperties() { + return dynamicProperties; + } + public Integer getMid() { return mid; } @@ -384,6 +390,7 @@ public void prepare() { readSource(); datasetId = extensionManager.get(mapping.getExtension().getRowType()).getProperty(Constants.DWC_DATASET_ID); + dynamicProperties = extensionManager.get(mapping.getExtension().getRowType()).getProperty(Constants.DWC_DYNAMIC_PROPERTIES); // prepare all other fields for (int i = 0; i < mapping.getExtension().getProperties().size(); i++) { @@ -423,6 +430,7 @@ public void prepare() { // ensure existing configuration re-loaded setDoiUsedForDatasetId(mapping.isDoiUsedForDatasetId()); + setGenerateJsonDynamicProperties(mapping.isGenerateJsonDynamicProperties()); if (!isHttpPost()) { validateAndReport(); @@ -498,6 +506,7 @@ public String save() throws IOException { mapping.setFields(mappedFields); // persist other configurations, e.g. using DOI as datasetId mapping.setDoiUsedForDatasetId(doiUsedForDatasetId); + mapping.setGenerateJsonDynamicProperties(generateJsonDynamicProperties); } // update last modified dates Date lastModified = new Date(); @@ -552,6 +561,14 @@ public void setDoiUsedForDatasetId(boolean doiUsedForDatasetId) { this.doiUsedForDatasetId = doiUsedForDatasetId; } + public boolean isGenerateJsonDynamicProperties() { + return generateJsonDynamicProperties; + } + + public void setGenerateJsonDynamicProperties(boolean generateJsonDynamicProperties) { + this.generateJsonDynamicProperties = generateJsonDynamicProperties; + } + /** * Called from Freemarker template. */ diff --git a/src/main/java/org/gbif/ipt/config/Constants.java b/src/main/java/org/gbif/ipt/config/Constants.java index 172ed4623c..fc7569a665 100644 --- a/src/main/java/org/gbif/ipt/config/Constants.java +++ b/src/main/java/org/gbif/ipt/config/Constants.java @@ -38,6 +38,7 @@ public final class Constants { public static final String DWC_EVENT_ID = "http://rs.tdwg.org/dwc/terms/eventID"; public static final String DWC_BASIS_OF_RECORD = "http://rs.tdwg.org/dwc/terms/basisOfRecord"; public static final String DWC_DATASET_ID = "http://rs.tdwg.org/dwc/terms/datasetID"; + public static final String DWC_DYNAMIC_PROPERTIES = "http://rs.tdwg.org/dwc/terms/dynamicProperties"; public static final String VOCAB_URI_DATASET_TYPE = "http://rs.gbif.org/vocabulary/gbif/datasetType"; public static final String VOCAB_URI_LANGUAGE = "http://iso.org/639-2"; public static final String VOCAB_URI_COUNTRY = "http://iso.org/iso3166-1/alpha2"; diff --git a/src/main/java/org/gbif/ipt/model/ExtensionMapping.java b/src/main/java/org/gbif/ipt/model/ExtensionMapping.java index cab88ac5a1..f8976e4819 100644 --- a/src/main/java/org/gbif/ipt/model/ExtensionMapping.java +++ b/src/main/java/org/gbif/ipt/model/ExtensionMapping.java @@ -39,6 +39,7 @@ public class ExtensionMapping implements Serializable { private String idSuffix; private RecordFilter filter; private boolean doiUsedForDatasetId; + private boolean generateJsonDynamicProperties; private Date lastModified; /** @@ -129,6 +130,10 @@ public boolean isDoiUsedForDatasetId() { return doiUsedForDatasetId; } + public boolean isGenerateJsonDynamicProperties() { + return generateJsonDynamicProperties; + } + /** * @return date extension mapping was last modified */ @@ -164,6 +169,10 @@ public void setDoiUsedForDatasetId(boolean doiUsedForDatasetId) { this.doiUsedForDatasetId = doiUsedForDatasetId; } + public void setGenerateJsonDynamicProperties(boolean generateJsonDynamicProperties) { + this.generateJsonDynamicProperties = generateJsonDynamicProperties; + } + public void setLastModified(Date lastModified) { this.lastModified = lastModified; } diff --git a/src/main/java/org/gbif/ipt/task/DynamicPropertiesGenerator.java b/src/main/java/org/gbif/ipt/task/DynamicPropertiesGenerator.java new file mode 100644 index 0000000000..4bbc3dc76a --- /dev/null +++ b/src/main/java/org/gbif/ipt/task/DynamicPropertiesGenerator.java @@ -0,0 +1,54 @@ +package org.gbif.ipt.task; + +import com.google.common.collect.Lists; +import com.google.gson.Gson; +import org.gbif.ipt.model.PropertyMapping; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class DynamicPropertiesGenerator { + + private List unmappedProperties = Lists.newArrayList(); + + public void init(List sourceColumns, Set mappedProperties) { + unmappedProperties.clear(); + if ((sourceColumns != null) && !sourceColumns.isEmpty()) { + for (int index=0; index map = new HashMap(); + for (DynamicPropertiesMapping prop : unmappedProperties) { + if ((prop.name != null) && (prop.index < values.length)) { + map.put(prop.name, values[prop.index]); + } + } + return new Gson().toJson(map); + } + + private boolean isMapped(Integer index, Set mappedProperties) { + for (PropertyMapping p : mappedProperties) { + if (index.equals(p.getIndex())) { + return true; + } + } + return false; + } + + private class DynamicPropertiesMapping { + String name; + Integer index; + } +} diff --git a/src/main/java/org/gbif/ipt/task/GenerateDwca.java b/src/main/java/org/gbif/ipt/task/GenerateDwca.java index d336b3cf2f..5dcdc78961 100644 --- a/src/main/java/org/gbif/ipt/task/GenerateDwca.java +++ b/src/main/java/org/gbif/ipt/task/GenerateDwca.java @@ -216,7 +216,7 @@ public void addDataFile(List mappings, @Nullable Integer rowLi /** * Write the header column line to file. - * + * * @param propertyList ordered list of all ExtensionProperty that have been mapped across all mappings for a single * Extension * @param totalColumns total number of columns in header @@ -243,7 +243,7 @@ private void writeHeaderLine(List propertyList, int totalColu /** * Adds EML file to DwC-A folder. - * + * * @throws GeneratorException if EML file could not be copied to DwC-A folder * @throws InterruptedException if executing thread was interrupted */ @@ -266,7 +266,7 @@ private void addEmlFile() throws GeneratorException, InterruptedException { *
* Since all default values ​​will be written in the data file, they won't be expressed in the archive file (meta.xml). * That's why the default value is always set to null. - * + * * @param term ConceptTerm * @param delimitedBy multi-value delimiter * @@ -288,7 +288,7 @@ private ArchiveField buildField(Term term, @Nullable String delimitedBy) { /** * Zips the DwC-A folder. A temp version is created first, and when successful, it it moved into the resource's * data directory. - * + * * @throws GeneratorException if DwC-A could not be zipped or moved * @throws InterruptedException if executing thread was interrupted */ @@ -328,7 +328,7 @@ private void bundleArchive() throws GeneratorException, InterruptedException { * Validate the DwC-A: * -ensure that if the core record identifier is mapped (e.g. occurrenceID, taxonID, etc) it is present on all * rows, and is unique - * + * * @throws GeneratorException if DwC-A could not be validated * @throws InterruptedException if executing thread was interrupted */ @@ -360,7 +360,7 @@ private void validate() throws GeneratorException, InterruptedException { /** * Sort the data file of a Darwin Core Archive by a column. Sorting is case sensitive. - * + * * @param file unsorted file * @param column column to sort by file by * @@ -912,7 +912,7 @@ private boolean isEventCore(Archive arch) { /** * Method responsible for all stages of DwC-A file generation. - * + * * @return number of records published in core file * @throws GeneratorException if DwC-A generation fails for any reason */ @@ -985,7 +985,7 @@ public Map call() throws Exception { /** * Checks if the executing thread has been interrupted, i.e. DwC-A generation was cancelled. - * + * * @throws InterruptedException if the thread was found to be interrupted */ private void checkForInterruption() throws InterruptedException { @@ -999,7 +999,7 @@ private void checkForInterruption() throws InterruptedException { /** * Checks if the executing thread has been interrupted, i.e. DwC-A generation was cancelled. - * + * * @param line number of lines currently processed at the time of the check * @throws InterruptedException if the thread was found to be interrupted */ @@ -1019,7 +1019,7 @@ protected boolean completed() { /** * Create data files. - * + * * @throws GeneratorException if the resource had no core file that was mapped * @throws InterruptedException if the thread was interrupted */ @@ -1047,7 +1047,7 @@ private void createDataFiles() throws GeneratorException, InterruptedException { /** * Create meta.xml file. - * + * * @throws GeneratorException if meta.xml file creation failed * @throws InterruptedException if the thread was interrupted */ @@ -1131,6 +1131,12 @@ private void dumpData(Writer writer, PropertyMapping[] inCols, ExtensionMapping ClosableReportingIterator iter = null; int line = 0; try { + DynamicPropertiesGenerator dynamicPropertiesGenerator = new DynamicPropertiesGenerator(); + if (mapping.isGenerateJsonDynamicProperties()) { + List sourceColumns = sourceManager.columns(mapping.getSource()); + dynamicPropertiesGenerator.init(sourceColumns, mapping.getFields()); + } + // get the source iterator iter = sourceManager.rowIterator(mapping.getSource()); @@ -1178,7 +1184,7 @@ else if (isEmptyLine(in)) { && filter.getParam() != null) { boolean matchesFilter; if (filter.getFilterTime() == RecordFilter.FilterTime.AfterTranslation) { - applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi); + applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi, mapping.isGenerateJsonDynamicProperties(), dynamicPropertiesGenerator); matchesFilter = filter.matches(in); alreadyTranslated = true; } else { @@ -1206,8 +1212,9 @@ else if (isEmptyLine(in)) { // go through all archive fields if (!alreadyTranslated) { - applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi); + applyTranslations(inCols, in, record, mapping.isDoiUsedForDatasetId(), doi, mapping.isGenerateJsonDynamicProperties(), dynamicPropertiesGenerator); } + String newRow = tabRow(record); if (newRow != null) { writer.write(newRow); @@ -1279,7 +1286,7 @@ else if (isEmptyLine(in)) { /** * Sets an exception and state of the worker to FAILED. The final StatusReport is generated at the end. - * + * * @param e exception */ private void setState(Exception e) { @@ -1290,7 +1297,7 @@ private void setState(Exception e) { /** * Sets only the state of the worker. The final StatusReport is generated at the end. - * + * * @param s STATE of worker */ private void setState(STATE s) { @@ -1338,7 +1345,7 @@ protected String tabRow(String[] columns) { * @param doi DOI assigned to resource */ private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] record, boolean doiUsedForDatasetId, - DOI doi) { + DOI doi, boolean generateJsonDynamicProperties, DynamicPropertiesGenerator dynamicPropertiesGenerator) { for (int i = 1; i < inCols.length; i++) { PropertyMapping pm = inCols[i]; String val = null; @@ -1361,6 +1368,10 @@ private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] r && doi != null) { val = doi.getDoiString(); } + // generate JSON for dynamicProperties field? + if (pm.getTerm().qualifiedName().equalsIgnoreCase(Constants.DWC_DYNAMIC_PROPERTIES) && generateJsonDynamicProperties) { + val = dynamicPropertiesGenerator.generateJson(in); + } } // add value to data file record record[i] = val; @@ -1369,7 +1380,7 @@ private void applyTranslations(PropertyMapping[] inCols, String[] in, String[] r /** * Print a line representation of a string array used for logging. - * + * * @param in String array * @return line */ @@ -1388,7 +1399,7 @@ private String printLine(String[] in) { /** * Write message from exception to publication log file as a new line but suffocate any exception thrown. - * + * * @param e exception to write message from */ private void writeFailureToPublicationLog(Throwable e) { @@ -1408,7 +1419,7 @@ private void writeFailureToPublicationLog(Throwable e) { * First we need to find the union of all terms mapped (in all files) for a single Extension. Then make each mapped * term a field in the final archive. Static/default mappings are not stored for a field, since they are not * expressed in meta.xml but instead get written to the data file. - * + * * @param mappings list of ExtensionMapping * @param af ArchiveFile * @@ -1461,6 +1472,20 @@ private Set addFieldsToArchive(List mappings, ArchiveFil // include datasetID in set of all terms mapped for Extension mappedConceptTerms.add(DwcTerm.datasetID); } + // if Extension has dynamicProperties concept term, check if generateJsonDynamicProperties should be used as value for mapping + ExtensionProperty epd = m.getExtension().getProperty(DwcTerm.dynamicProperties.qualifiedName()); + if (epd != null && m.isGenerateJsonDynamicProperties()) { + log.debug("Detected that dynamicProperties should be generated.."); + // include dynamicProperties field in ArchiveFile + ArchiveField f = buildField(DwcTerm.dynamicProperties, null); + af.addField(f); + // include dynamicProperties field mapping in ExtensionMapping + PropertyMapping pm = new PropertyMapping(f); + pm.setTerm(epd); + m.getFields().add(pm); + // include dynamicProperties in set of all terms mapped for Extension + mappedConceptTerms.add(DwcTerm.dynamicProperties); + } } return mappedConceptTerms; } @@ -1469,7 +1494,7 @@ private Set addFieldsToArchive(List mappings, ArchiveFil * Iterate through ordered list of those ExtensionProperty that have been mapped, and reassign the ArchiveFile * ArchiveField indexes, based on the order of their appearance in the ordered list be careful to reserve index 0 for * the ID column - * + * * @param propertyList ordered list of those ExtensionProperty that have been mapped * @param af ArchiveFile */ @@ -1494,7 +1519,7 @@ private void assignIndexesOrderedByExtension(List propertyLis /** * Retrieve the ordered list of all Extension's mapped ExtensionProperty. Ordering is done according to Extension. - * + * * @param ext Extension * @param mappedConceptTerms set of all mapped ConceptTerm * @return ordered list of mapped ExtensionProperty diff --git a/src/main/resources/ApplicationResources_en.properties b/src/main/resources/ApplicationResources_en.properties index 2f65bb5482..4c0a4a049d 100644 --- a/src/main/resources/ApplicationResources_en.properties +++ b/src/main/resources/ApplicationResources_en.properties @@ -829,6 +829,7 @@ manage.mapping.datasetIdColumn.help=Please check the "Use resource DOI" box if y manage.mapping.noid=No ID manage.mapping.lineNumber=Line Number manage.mapping.uuid=UUID Generator +manage.mapping.dynamic.all.unmapped.fields=Generate JSON object from all unmapped columns manage.mapping.hideEmpty=Hide unmapped fields manage.mapping.hideGroups=Hide redundant classes manage.mapping.redundant=Redundant classes diff --git a/src/main/webapp/WEB-INF/pages/manage/mapping.ftl b/src/main/webapp/WEB-INF/pages/manage/mapping.ftl index a366989f3b..6def5de121 100644 --- a/src/main/webapp/WEB-INF/pages/manage/mapping.ftl +++ b/src/main/webapp/WEB-INF/pages/manage/mapping.ftl @@ -234,7 +234,7 @@ -<#macro datasetDoiCheckbox idAttr name i18nkey classAttr requiredField value="-99999" errorfield=""> +<#macro mappingCheckbox idAttr name i18nkey classAttr requiredField value="-99999" errorfield="">
<#-- use name if value was not supplied --> <#if value == "-99999"> @@ -322,9 +322,16 @@ <#if datasetId?? && p.qualifiedName()?lower_case == datasetId.qualname?lower_case>
<#-- option to use DOI as datasetID --> - <@datasetDoiCheckbox idAttr="cVal${fieldsIndex}" name="doiUsedForDatasetId" i18nkey="manage.mapping.datasetIdColumn" classAttr="cval datasetDoiCheckbox form-check-input" requiredField=false value="${doiUsedForDatasetId?string}" errorfield="" /> + <@mappingCheckbox idAttr="cVal${fieldsIndex}" name="doiUsedForDatasetId" i18nkey="manage.mapping.datasetIdColumn" classAttr="cval datasetDoiCheckbox form-check-input" requiredField=false value="${doiUsedForDatasetId?string}" errorfield="" />
+ + <#if dynamicProperties?? && p.qualifiedName()?lower_case == dynamicProperties.qualname?lower_case> +
+ <#-- option to generate JSON with all unmapped fields for dynamicProperties --> + <@mappingCheckbox idAttr="cVal${fieldsIndex}" name="generateJsonDynamicProperties" i18nkey="manage.mapping.dynamic.all.unmapped.fields" classAttr="cval datasetDoiCheckbox form-check-input" requiredField=false value="${generateJsonDynamicProperties?string}" errorfield="" /> +
+
diff --git a/src/test/java/org/gbif/ipt/task/GenerateDwcaTest.java b/src/test/java/org/gbif/ipt/task/GenerateDwcaTest.java index dab588a5fd..f8bc216236 100644 --- a/src/test/java/org/gbif/ipt/task/GenerateDwcaTest.java +++ b/src/test/java/org/gbif/ipt/task/GenerateDwcaTest.java @@ -313,6 +313,131 @@ public void testGenerateCoreFromSingleSourceFileDOIForDatasetID() throws Excepti reader.close(); } + /** + * Use generated JSON for DynamicProperties + */ + @Test + public void testGenerateCoreFromSingleSourceFileJsonDynamicProperties() throws Exception { + // retrieve sample zipped resource XML configuration file, where setting "generated dynamic properties" has been turned on + File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_generate_dynamic_properties.xml"); + // create resource from single source file + File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_dynamic_properties.txt"); + Resource resource = getResource(resourceXML, occurrence); + + generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig, + mockVocabulariesManager); + Map recordsByExtension = generateDwca.call(); + // count for occurrence core only + assertEquals(1, recordsByExtension.size()); + + // 2 rows in core file + String coreRowType = resource.getCoreRowType(); + assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType); + int recordCount = recordsByExtension.get(resource.getCoreRowType()); + assertEquals(2, recordCount); + + // confirm existence of versioned (archived) DwC-A "dwca-3.0.zip" + File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME); + assertTrue(versionedDwca.exists()); + + // investigate the DwC-A + File dir = FileUtils.createTempDir(); + CompressionUtil.decompressFile(dir, versionedDwca, true); + + Archive archive = ArchiveFactory.openArchive(dir); + assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType()); + assertEquals(0, archive.getCore().getId().getIndex().intValue()); + assertEquals(4, archive.getCore().getFieldsSorted().size()); + + // confirm order of fields appear honors order of Occurrence Core Extension + assertEquals("basisOfRecord", archive.getCore().getFieldsSorted().get(0).getTerm().simpleName()); + assertEquals("dynamicProperties", archive.getCore().getFieldsSorted().get(1).getTerm().simpleName()); + assertEquals("occurrenceID", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName()); + assertEquals("scientificName", archive.getCore().getFieldsSorted().get(3).getTerm().simpleName()); + + // confirm data written to file + CSVReader reader = archive.getCore().getCSVReader(); + // 1st record + String[] row = reader.next(); + assertEquals("1", row[0]); + assertEquals("occurrence", row[1]); + assertEquals("{\"customValue\":\"val1\",\"datasetID\":\"ds_77\",\"kingdom\":\"animalia\"}", row[2]); + assertEquals("1", row[3]); + assertEquals("puma concolor", row[4]); + + // 2nd record + row = reader.next(); + assertEquals("2", row[0]); + assertEquals("occurrence", row[1]); + assertEquals("{\"customValue\":\"val2\",\"datasetID\":\"ds_77\",\"kingdom\":\"animalia\"}", row[2]); + assertEquals("2", row[3]); + assertEquals("pumm:concolor", row[4]); + reader.close(); + } + + /** + * Use standard mapping for DynamicProperties + */ + @Test + public void testGenerateCoreFromSingleSourceFileStandardDynamicProperties() throws Exception { + // retrieve sample zipped resource XML configuration file, where setting "generated dynamic properties" has been turned off + // and standard mapping is used + File resourceXML = FileUtils.getClasspathFile("resources/res1/resource_standard_dynamic_properties.xml"); + // create resource from single source file + File occurrence = FileUtils.getClasspathFile("resources/res1/occurrence_dynamic_properties.txt"); + Resource resource = getResource(resourceXML, occurrence); + + generateDwca = new GenerateDwca(resource, mockHandler, mockDataDir, mockSourceManager, mockAppConfig, + mockVocabulariesManager); + Map recordsByExtension = generateDwca.call(); + // count for occurrence core only + assertEquals(1, recordsByExtension.size()); + + // 2 rows in core file + String coreRowType = resource.getCoreRowType(); + assertEquals(Constants.DWC_ROWTYPE_OCCURRENCE, coreRowType); + int recordCount = recordsByExtension.get(resource.getCoreRowType()); + assertEquals(2, recordCount); + + // confirm existence of versioned (archived) DwC-A "dwca-3.0.zip" + File versionedDwca = new File(resourceDir, VERSIONED_ARCHIVE_FILENAME); + assertTrue(versionedDwca.exists()); + + // investigate the DwC-A + File dir = FileUtils.createTempDir(); + CompressionUtil.decompressFile(dir, versionedDwca, true); + + Archive archive = ArchiveFactory.openArchive(dir); + assertEquals(DwcTerm.Occurrence, archive.getCore().getRowType()); + assertEquals(0, archive.getCore().getId().getIndex().intValue()); + assertEquals(4, archive.getCore().getFieldsSorted().size()); + + // confirm order of fields appear honors order of Occurrence Core Extension + assertEquals("basisOfRecord", archive.getCore().getFieldsSorted().get(0).getTerm().simpleName()); + assertEquals("dynamicProperties", archive.getCore().getFieldsSorted().get(1).getTerm().simpleName()); + assertEquals("occurrenceID", archive.getCore().getFieldsSorted().get(2).getTerm().simpleName()); + assertEquals("scientificName", archive.getCore().getFieldsSorted().get(3).getTerm().simpleName()); + + // confirm data written to file + CSVReader reader = archive.getCore().getCSVReader(); + // 1st record + String[] row = reader.next(); + assertEquals("1", row[0]); + assertEquals("occurrence", row[1]); + assertEquals("val1", row[2]); + assertEquals("1", row[3]); + assertEquals("puma concolor", row[4]); + + // 2nd record + row = reader.next(); + assertEquals("2", row[0]); + assertEquals("occurrence", row[1]); + assertEquals("val2", row[2]); + assertEquals("2", row[3]); + assertEquals("pumm:concolor", row[4]); + reader.close(); + } + @Test public void testGenerateCoreFromSingleSourceFileNoIdMapped() throws Exception { // retrieve sample zipped resource XML configuration file, with no id mapped diff --git a/src/test/resources/resources/res1/occurrence_dynamic_properties.txt b/src/test/resources/resources/res1/occurrence_dynamic_properties.txt new file mode 100644 index 0000000000..f7cd3bea88 --- /dev/null +++ b/src/test/resources/resources/res1/occurrence_dynamic_properties.txt @@ -0,0 +1,3 @@ +id scientificName basisOfRecord kingdom datasetID customValue +1 puma concolor occurrence animalia ds_77 val1 +2 pumm:concolor occurrence animalia ds_77 val2 diff --git a/src/test/resources/resources/res1/resource_generate_dynamic_properties.xml b/src/test/resources/resources/res1/resource_generate_dynamic_properties.xml new file mode 100644 index 0000000000..3473ae4f14 --- /dev/null +++ b/src/test/resources/resources/res1/resource_generate_dynamic_properties.xml @@ -0,0 +1,71 @@ + + Occurrence + REGISTERED + 3.0 + 2012-08-16 13:15:16.892 UTC + 2 + 54b58d1a-a982-445a-b4c2-5f4e49c899be + f3e8e9a9-df60-40ca-bb71-7f49313b3150 + kbraak@gbif.org + 2012-08-14 17:31:49.546 UTC + + 2012-08-16 14:30:31.779 UTC + PUBLIC_PENDING_PUBLICATION + + 10.5072 + gc8gqc + + + jcuadra@gbif.org + + + + + occurrence + UTF-8 + YYYY-MM-DD + 18 + true + + + 1 + 4262 + 15 + 2012-08-16 12:20:19.819 UTC + + + + + + http://rs.tdwg.org/dwc/terms/Occurrence + + + 2 + http://rs.tdwg.org/dwc/terms/basisOfRecord + + string + + + 1 + http://rs.tdwg.org/dwc/terms/scientificName + + string + + + 0 + http://rs.tdwg.org/dwc/terms/occurrenceID + + string + + + 0 + + + + AfterTranslation + + false + true + + + diff --git a/src/test/resources/resources/res1/resource_standard_dynamic_properties.xml b/src/test/resources/resources/res1/resource_standard_dynamic_properties.xml new file mode 100644 index 0000000000..24e43231d1 --- /dev/null +++ b/src/test/resources/resources/res1/resource_standard_dynamic_properties.xml @@ -0,0 +1,77 @@ + + Occurrence + REGISTERED + 3.0 + 2012-08-16 13:15:16.892 UTC + 2 + 54b58d1a-a982-445a-b4c2-5f4e49c899be + f3e8e9a9-df60-40ca-bb71-7f49313b3150 + kbraak@gbif.org + 2012-08-14 17:31:49.546 UTC + + 2012-08-16 14:30:31.779 UTC + PUBLIC_PENDING_PUBLICATION + + 10.5072 + gc8gqc + + + jcuadra@gbif.org + + + + + occurrence + UTF-8 + YYYY-MM-DD + 18 + true + + + 1 + 4262 + 15 + 2012-08-16 12:20:19.819 UTC + + + + + + http://rs.tdwg.org/dwc/terms/Occurrence + + + 2 + http://rs.tdwg.org/dwc/terms/basisOfRecord + + string + + + 1 + http://rs.tdwg.org/dwc/terms/scientificName + + string + + + 0 + http://rs.tdwg.org/dwc/terms/occurrenceID + + string + + + 5 + http://rs.tdwg.org/dwc/terms/dynamicProperties + + string + + + 0 + + + + AfterTranslation + + false + false + + +