Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add use-default-language option #53

Merged
merged 4 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ public RdfToolkitOptions handleArguments(String[] args)
optionHandler.handleLineEnd();
optionHandler.handleOmitXmlnsNamespace();
optionHandler.handleSuppressNamedIndividuals();
optionHandler.handleUseDefaultLanguage();

rdfToolkitOptions.setRunningMode(RUN_ON_FILE);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_DIRECTORY_PATTERN;
import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TARGET_FORMAT;
import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.TRAILING_COMMENT;
import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.USE_DEFAULT_LANGUAGE;
import static org.edmcouncil.rdf_toolkit.runner.constant.CommandLineOption.USE_DTD_SUBSET;
import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.PRINT_USAGE_AND_EXIT;
import static org.edmcouncil.rdf_toolkit.runner.constant.RunningMode.RUN_ON_DIRECTORY;
Expand Down Expand Up @@ -391,4 +392,12 @@ public void handleSuppressNamedIndividuals() {
boolean suppressNamedIndividuals = commandLine.hasOption(SUPPRESS_NAMED_INDIVIDUALS.getShortOpt());
rdfToolkitOptions.setSuppressNamedIndividuals(suppressNamedIndividuals);
}

public void handleUseDefaultLanguage() {
String defaultLanguage = null;
if (commandLine.hasOption(USE_DEFAULT_LANGUAGE.getShortOpt())) {
defaultLanguage = commandLine.getOptionValue(USE_DEFAULT_LANGUAGE.getShortOpt());
}
rdfToolkitOptions.setDefaultLanguage(defaultLanguage);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
package org.edmcouncil.rdf_toolkit.runner;

import static org.edmcouncil.rdf_toolkit.util.Constants.BASE_IRI;
import static org.edmcouncil.rdf_toolkit.util.Constants.USE_DEFAULT_LANGUAGE;
import static org.edmcouncil.rdf_toolkit.util.Constants.INDENT;
import static org.edmcouncil.rdf_toolkit.util.Constants.INLINE_BLANK_NODES;
import static org.edmcouncil.rdf_toolkit.util.Constants.LEADING_COMMENTS;
Expand Down Expand Up @@ -77,6 +78,7 @@ public class RdfToolkitOptions {
private String lineEnd;
private boolean omitXmlnsNamespace;
private boolean suppressNamedIndividuals;
private String defaultLanguage;

public RdfToolkitOptions(String[] args) {
this.args = args;
Expand Down Expand Up @@ -108,6 +110,7 @@ public Map<String, Object> getOptions() {
options.put(LINE_END, getLineEnd());
options.put(OMIT_XMLNS_NAMESPACE, getOmitXmlnsNamespace());
options.put(SUPPRESS_NAMED_INDIVIDUALS, getSuppressNamedIndividuals());
options.put(USE_DEFAULT_LANGUAGE, getDefaultLanguage());
return options;
}

Expand Down Expand Up @@ -302,4 +305,12 @@ public boolean getSuppressNamedIndividuals() {
public void setSuppressNamedIndividuals(boolean suppressNamedIndividuals) {
this.suppressNamedIndividuals = suppressNamedIndividuals;
}

public String getDefaultLanguage() {
return defaultLanguage;
}

public void setDefaultLanguage(String defaultLanguage) {
this.defaultLanguage = defaultLanguage;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ public enum CommandLineOption {
INDENT("i", "indent", true, "sets the indent string. Default is a single tab character"),
LINE_END("ln", "line-end", true, "sets the end-line character(s); supported characters: \\n (LF), \\r (CR). Default is the LF character"),
OMIT_XMLNS_NAMESPACE("oxn", "omit-xmlns-namespace", false, "omits xmlns namespace"),
SUPPRESS_NAMED_INDIVIDUALS("sni", "suppress-named-individuals", false, "suppresses all instances of owl:NamedIndividual");
SUPPRESS_NAMED_INDIVIDUALS("sni", "suppress-named-individuals", false, "suppresses all instances of owl:NamedIndividual"),
USE_DEFAULT_LANGUAGE("udl", "use-default-language", true, "uses default language when a string is untyped and untagged");

private final String shortOpt;
private final String longOpt;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class Constants {
public static final String LINE_END = "lineEnd";
public static final String OMIT_XMLNS_NAMESPACE = "omitXmlnsNamespace";
public static final String SUPPRESS_NAMED_INDIVIDUALS = "suppressNamedIndividuals";
public static final String USE_DEFAULT_LANGUAGE = "useDefaultLanguage";

/**
* Factory for generating literal values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class IndentingWriter extends Writer {
private String indentationString = "\t";
private boolean indentationWritten = false;
private String lineEnd;
private String defaultLanguage; // TODO remove

public IndentingWriter(Writer out) {
this.out = out;
Expand Down Expand Up @@ -70,6 +71,14 @@ public void setLineEnd(String lineEnd) {
this.lineEnd = lineEnd;
}

public String getDefaultLanguage() {
return this.defaultLanguage;
}

public void setDefaultLanguage(String defaultLanguage) {
this.defaultLanguage = defaultLanguage;
}

public void writeEOL() throws IOException {
this.write(getLineEnd());
this.indentationWritten = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
import static org.edmcouncil.rdf_toolkit.comparator.ComparisonUtils.isCollection;
import static org.edmcouncil.rdf_toolkit.util.Constants.INDENT;
import static org.edmcouncil.rdf_toolkit.util.Constants.LINE_END;
import static org.edmcouncil.rdf_toolkit.util.Constants.xsString;

import java.io.IOException;
import java.util.Optional;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
Expand All @@ -40,7 +43,6 @@
import org.edmcouncil.rdf_toolkit.model.UnsortedTurtleObjectList;
import org.edmcouncil.rdf_toolkit.model.UnsortedTurtlePredicateObjectMap;
import org.edmcouncil.rdf_toolkit.util.Constants;
import org.edmcouncil.rdf_toolkit.util.StringDataTypeOptions;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
Expand Down Expand Up @@ -624,58 +626,29 @@ protected void writeObject(Writer out, IRI iri, boolean isRdfType) throws Except
protected void writeObject(Writer out, Literal literal) throws Exception {
if (literal == null) {
out.write("null<Literal>");
} else if (literal.getLanguage().isPresent() || ((overrideStringLanguage != null)
&& (literal.getDatatype().stringValue().equals(Constants.xsString.stringValue())))) {
out.write("{");
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.writeEOL();
indentingWriter.increaseIndentation();
} else {
out.write("\n");
}

String lang = overrideStringLanguage == null ?
literal.getLanguage().orElse(overrideStringLanguage) :
overrideStringLanguage;
return;
}

out.write("\"@language\" : \"" + lang + "\",");
if (out instanceof IndentingWriter) {
var output = (IndentingWriter) out;
output.writeEOL();
} else {
out.write("\n");
}
IRI datatype = literal.getDatatype();
Optional<String> languageOptional = literal.getLanguage();

out.write("\"@value\" : \"" + escapeString(literal.stringValue()) + "\"");
if (overrideStringLanguage != null && (xsString.equals(datatype) || languageOptional.isPresent())) {
writeLanguage(literal, overrideStringLanguage);
} else if (languageOptional.isPresent()) {
writeLanguage(literal, languageOptional.get());
} else if (useDefaultLanguage != null && xsString.equals(datatype)) {
writeLanguage(literal, useDefaultLanguage);
} else if (datatype != null) {
out.write("{");
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.writeEOL();
indentingWriter.increaseIndentation();
} else {
out.write("\n");
}

if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.decreaseIndentation();
out.write("}");
} else {
out.write("}");
}
} else if (literal.getDatatype() != null) {
boolean useExplicit = (stringDataTypeOption == StringDataTypeOptions.EXPLICIT)
|| !(Constants.xsString.equals(literal.getDatatype())
|| Constants.rdfLangString.equals(literal.getDatatype()));
if (useExplicit) {
out.write("{");
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.writeEOL();
indentingWriter.increaseIndentation();
} else {
out.write("\n");
}

if (shouldUseExplicitDatatypes(datatype)) {
out.write("\"@type\" : \"");
writeIri(out, literal.getDatatype());
out.write("\",");
Expand All @@ -685,26 +658,59 @@ protected void writeObject(Writer out, Literal literal) throws Exception {
} else {
out.write("\n");
}
}

out.write("\"@value\" : ");
writeString(out, literal.stringValue());
out.write("\"@value\" : ");
writeString(out, literal.stringValue());

if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.decreaseIndentation();
indentingWriter.writeEOL();
out.write("}");
} else {
out.write("\n}");
}
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.decreaseIndentation();
indentingWriter.writeEOL();
out.write("}");
} else {
writeString(out, literal.stringValue());
out.write("\n}");
}
} else {
writeString(out, literal.stringValue());
}
}

private void writeLanguage(Literal literal, String lang) throws IOException {
out.write("{");
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.writeEOL();
indentingWriter.increaseIndentation();
} else {
out.write("\n");
}

out.write("\"@language\" : \"" + lang + "\",");
if (out instanceof IndentingWriter) {
var output = (IndentingWriter) out;
output.writeEOL();
} else {
out.write("\n");
}

out.write("\"@value\" : \"" + escapeString(literal.stringValue()) + "\"");
if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.writeEOL();
} else {
out.write("\n");
}

if (out instanceof IndentingWriter) {
var indentingWriter = (IndentingWriter) out;
indentingWriter.decreaseIndentation();
out.write("}");
} else {
out.write("}");
}
}

protected void writeString(Writer out, String str) throws Exception {
// Note that JSON does not support multi-line strings, unlike Turtle
if (str == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import static org.edmcouncil.rdf_toolkit.util.Constants.STRING_DATA_TYPE_OPTION;
import static org.edmcouncil.rdf_toolkit.util.Constants.SUPPRESS_NAMED_INDIVIDUALS;
import static org.edmcouncil.rdf_toolkit.util.Constants.TRAILING_COMMENTS;
import static org.edmcouncil.rdf_toolkit.util.Constants.USE_DEFAULT_LANGUAGE;
import static org.edmcouncil.rdf_toolkit.util.Constants.USE_DTD_SUBSET;

import org.eclipse.rdf4j.model.BNode;
Expand Down Expand Up @@ -207,6 +208,12 @@ public abstract class SortedRdfWriter extends AbstractRDFWriter {
*/
protected boolean suppressNamedIndividuals = false;

/**
* If set, it's used to add a default language for literals with xsd:string datatype (explicit or implicit) and
* without language already specified. By default, <code>null</code>.
*/
protected String useDefaultLanguage = null;

/**
* Output stream for this RDF writer.
*/
Expand Down Expand Up @@ -286,6 +293,9 @@ private void processOptions(Map<String, Object> options) {
if (options.containsKey(SUPPRESS_NAMED_INDIVIDUALS)) {
this.suppressNamedIndividuals = Boolean.parseBoolean(options.get(SUPPRESS_NAMED_INDIVIDUALS).toString());
}
if (options.containsKey(USE_DEFAULT_LANGUAGE)) {
this.useDefaultLanguage = (String) options.get(USE_DEFAULT_LANGUAGE);
}
}

/**
Expand Down Expand Up @@ -720,4 +730,9 @@ protected abstract void writePredicateAndObjectValues(Writer out, IRI predicate,
throws Exception;

protected abstract void writeFooter(Writer out, String[] trailingComments) throws Exception;

protected boolean shouldUseExplicitDatatypes(IRI literalDatatype) {
return stringDataTypeOption == StringDataTypeOptions.EXPLICIT ||
!(Constants.xsString.equals(literalDatatype) || Constants.rdfLangString.equals(literalDatatype));
}
}
Loading