diff --git a/registry-ws/src/main/java/org/gbif/registry/ws/export/CsvWriter.java b/registry-ws/src/main/java/org/gbif/registry/ws/export/CsvWriter.java index 9480890df4..30fc83de5b 100644 --- a/registry-ws/src/main/java/org/gbif/registry/ws/export/CsvWriter.java +++ b/registry-ws/src/main/java/org/gbif/registry/ws/export/CsvWriter.java @@ -11,6 +11,7 @@ import java.io.Writer; import java.util.List; import java.util.UUID; +import java.util.regex.Pattern; import java.util.stream.Collectors; import lombok.Builder; @@ -55,10 +56,13 @@ private CsvPreference csvPreference() { @SneakyThrows public void export(Writer writer) { try (ICsvBeanWriter beanWriter = new CsvBeanWriter(writer, csvPreference())) { + int count = 1; beanWriter.writeHeader(header); for (T o : pager) { beanWriter.write(o, fields, processors); + count++; } + System.out.println("count export " + count); writer.flush(); } } @@ -92,21 +96,21 @@ public static CsvWriter datasetSearchResultCsvWriter(Iterab .fields(new String[]{"key", "title", "doi", "license", "type", "subType", "hostingOrganizationKey", "hostingOrganizationTitle", "hostingCountry", "publishingOrganizationKey", "publishingOrganizationTitle", "publishingCountry","endorsingNodeKey", "networkKeys", "projectIdentifier", "recordCount", "nameUsagesCount"}) .header(new String[]{"dataset_key", "title", "doi", "license", "type", "sub_type", "hosting_organization_Key", "hosting_organization_title", "hosting_country","publishing_organization_key", "publishing_organization_title", "publishing_country", "endorsing_node_key", "network_keys", "project_identifier", "occurrence_records_count", "name_usages_count"}) // "recordCount", "nameUsagesCount" - .processors(new CellProcessor[]{new UUIDProcessor(), //key - null, //title + .processors(new CellProcessor[]{new UUIDProcessor(), //key + new CleanStringProcessor(), //title new DOIProcessor(), //doi new Optional(new ParseEnum(License.class)), //license new Optional(new ParseEnum(DatasetType.class)), //type new Optional(new ParseEnum(DatasetSubtype.class)),//subType new UUIDProcessor(), //hostingOrganizationKey - null, //hostingOrganizationTitle + new CleanStringProcessor(), //hostingOrganizationTitle new CountryProcessor(), //hostingCountry new UUIDProcessor(), //publishingOrganizationKey - null, //publishingOrganizationTitle + new CleanStringProcessor(), //publishingOrganizationTitle new CountryProcessor(), //publishingCountry new UUIDProcessor(), //endorsingNodeKey new ListUUIDProcessor(), //networkKeys - null, //projectIdentifier + new CleanStringProcessor(), //projectIdentifier new Optional(new ParseInt()), //recordCount new Optional(new ParseInt()) //nameUsagesCount }) @@ -156,4 +160,24 @@ public String execute(Object value, CsvContext csvContext) { return value != null ? ((Country) value).getIso2LetterCode() : ""; } } + + + /** + * Produces a String instance clean of delimiter. + * If the value is null an empty string is returned. + * Borrowed from Occurrence Downloads!!. + */ + private static class CleanStringProcessor implements CellProcessor { + + private static final String DELIMETERS_MATCH = + "\\t|\\n|\\r|(?:(?>\\u000D\\u000A)|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029\\u0000])"; + + public static final Pattern DELIMETERS_MATCH_PATTERN = Pattern.compile(DELIMETERS_MATCH); + + @Override + public String execute(Object value, CsvContext context) { + return value != null ? DELIMETERS_MATCH_PATTERN.matcher((String) value).replaceAll(" ") : ""; + } + + } }