Skip to content

Commit

Permalink
Stop HTML encoding unicode characters when serializing XForm (#685)
Browse files Browse the repository at this point in the history
  • Loading branch information
seadowg authored May 31, 2022
1 parent 660331f commit adde512
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 6 deletions.
10 changes: 4 additions & 6 deletions src/main/java/org/javarosa/xform/util/XFormSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@
import org.kxml2.io.KXmlSerializer;
import org.kxml2.kdom.Document;
import org.kxml2.kdom.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;

/* this is just a big dump of serialization-related code */

Expand Down Expand Up @@ -88,8 +87,7 @@ public static byte[] getUtfBytes(Document doc) {
KXmlSerializer serializer = new KXmlSerializer();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
Writer osw = new OutputStreamWriter(bos, "UTF-8");
serializer.setOutput(osw);
serializer.setOutput(bos, StandardCharsets.UTF_8.name());
doc.write(serializer);
serializer.flush();
return bos.toByteArray();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.javarosa.model.xform;

import org.javarosa.core.test.Scenario;
import org.javarosa.core.util.XFormsElement;
import org.junit.Test;

import java.io.IOException;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.javarosa.core.util.BindBuilderXFormsElement.bind;
import static org.javarosa.core.util.XFormsElement.body;
import static org.javarosa.core.util.XFormsElement.head;
import static org.javarosa.core.util.XFormsElement.html;
import static org.javarosa.core.util.XFormsElement.input;
import static org.javarosa.core.util.XFormsElement.mainInstance;
import static org.javarosa.core.util.XFormsElement.model;
import static org.javarosa.core.util.XFormsElement.t;
import static org.javarosa.core.util.XFormsElement.title;

public class XFormSerializingVisitorTest {

@Test
public void serializeInstance_preservesUnicodeCharacters() throws IOException {
XFormsElement formDef = html(
head(
title("Some form"),
model(
mainInstance(t("data id=\"some-form\"",
t("text")
)),
bind("/data/text").type("string")
)
),
body(input("/data/text"))
);

Scenario scenario = Scenario.init("Some form", formDef);
scenario.next();
scenario.answer("\uD83E\uDDDB");

XFormSerializingVisitor visitor = new XFormSerializingVisitor();
byte[] serializedInstance = visitor.serializeInstance(scenario.getFormDef().getMainInstance());
assertThat(new String(serializedInstance), containsString("<text>\uD83E\uDDDB</text>"));
}
}

0 comments on commit adde512

Please sign in to comment.