Skip to content

Commit 10c520a

Browse files
OWL-Varunstevedlawrence
authored andcommitted
Allow <![CDATA[]]> to preserve whitespace in XML text content
Add a new constructor parameter (an enum XMLTextEscapeStyle with Standard and CDATA as values) to XMLTextInfosetOutputter. When CDATA is passed instead of Standard, wrap simple XML elements' text contents in CDATA brackets to preserve any whitespace they contain. DAFFODIL-2346
1 parent d06722c commit 10c520a

File tree

13 files changed

+460
-59
lines changed

13 files changed

+460
-59
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.daffodil.japi.infoset;
19+
20+
/**
21+
* XMLTextEscapeStyles for determining whether to wrap info in CDATA tags
22+
*/
23+
public enum XMLTextEscapeStyle {
24+
/**
25+
* Special characters (quotation mark, ampersand, less-than, greater-than) in the
26+
* text of xs:string elements are escaped, while non-special characters are written
27+
* as is.
28+
*/
29+
Standard,
30+
31+
/**
32+
* The text of xs:string elements are wrapped in CDATA tags if the string contains
33+
* special characters (quotation mark, ampersand, less-than, greater-than) or
34+
* whitespace
35+
*/
36+
CDATA,
37+
}

daffodil-japi/src/main/scala/org/apache/daffodil/japi/infoset/Infoset.scala

+21-3
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ import org.apache.daffodil.infoset.DIComplex
4040
import org.apache.daffodil.infoset.DIArray
4141
import org.apache.daffodil.dpath.NodeInfo
4242

43+
import org.apache.daffodil.japi.packageprivate._
44+
4345
/**
4446
* Abstract class used to determine how the infoset representation should be
4547
* input from a call to DataProcessor#unparse. This uses a Cursor API, such
@@ -52,7 +54,7 @@ abstract class InfosetInputter extends SInfosetInputter {
5254
/**
5355
* Return the current infoset inputter event type
5456
*/
55-
def getEventType(): InfosetInputterEventType
57+
def getEventType(): InfosetInputterEventType
5658

5759
/**
5860
* Get the local name of the current event. This will only be called when the
@@ -211,7 +213,7 @@ abstract class InfosetOutputter extends SInfosetOutputter {
211213
* classes, we can document these classes and have a small and clean javadoc.
212214
*/
213215

214-
216+
215217
/**
216218
* [[InfosetOutputter]] to build an infoset represented as a scala.xml.Node
217219
*
@@ -244,7 +246,23 @@ class XMLTextInfosetOutputter private (outputter: SXMLTextInfosetOutputter)
244246
* insert indentation and newlines where it will not affect the
245247
* content of the XML.
246248
*/
247-
def this(os: java.io.OutputStream, pretty: Boolean) = this(new SXMLTextInfosetOutputter(os, pretty))
249+
def this(os: java.io.OutputStream, pretty: Boolean) = this(new SXMLTextInfosetOutputter(os,
250+
pretty, XMLTextEscapeStyleConversions.styleToScala(XMLTextEscapeStyle.Standard)))
251+
252+
/**
253+
* Output the infoset as XML Text, written to a java.io.OutputStream
254+
*
255+
* @param os the java.io.OutputStream to write the XML text to
256+
* @param pretty enable or disable pretty printing. Pretty printing will only
257+
* insert indentation and newlines where it will not affect the
258+
* content of the XML.
259+
* @param xmlTextEscapeStyle determine whether to wrap values of elements of type
260+
* xs:string in CDATA tags in order to preserve
261+
* whitespace.
262+
*/
263+
def this(os: java.io.OutputStream, pretty: Boolean, xmlTextEscapeStyle: XMLTextEscapeStyle) = {
264+
this(new SXMLTextInfosetOutputter(os, pretty, XMLTextEscapeStyleConversions.styleToScala(xmlTextEscapeStyle)))
265+
}
248266

249267
override val infosetOutputter = outputter
250268
}

daffodil-japi/src/main/scala/org/apache/daffodil/japi/packageprivate/Utils.scala

+15
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ import org.apache.daffodil.api.{ ValidationMode => SValidationMode }
3030
import org.apache.daffodil.debugger.{ InteractiveDebugger => SInteractiveDebugger }
3131
import org.apache.daffodil.debugger.{ InteractiveDebuggerRunner => SInteractiveDebuggerRunner }
3232

33+
import org.apache.daffodil.infoset.{ XMLTextEscapeStyle => SXMLTextEscapeStyle }
34+
import org.apache.daffodil.japi.infoset._
35+
3336
private[japi] object ValidationConversions {
3437

3538
def modeToScala(mode: ValidationMode): SValidationMode.Type = {
@@ -42,6 +45,18 @@ private[japi] object ValidationConversions {
4245
}
4346
}
4447

48+
private[japi] object XMLTextEscapeStyleConversions {
49+
50+
def styleToScala(style: XMLTextEscapeStyle): SXMLTextEscapeStyle.Value = {
51+
val sxmlTextEscapeStyle: SXMLTextEscapeStyle.Value = style match {
52+
case XMLTextEscapeStyle.Standard => SXMLTextEscapeStyle.Standard
53+
case XMLTextEscapeStyle.CDATA => SXMLTextEscapeStyle.CDATA
54+
case _ => throw new Exception("Unrecognized value: %s for parameter: xmlTextEscapeStyle. Must be 'Standard' or 'CDATA'.".format(style))
55+
}
56+
sxmlTextEscapeStyle
57+
}
58+
}
59+
4560
/* A wrapper interctive debugger that scala debugging can talk to, which is
4661
* then forwarded onto the java interactive debugger, if a user implements
4762
* their own debugger in java.

daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java

+84-5
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353

5454
import javax.xml.XMLConstants;
5555

56+
import java.nio.charset.StandardCharsets;
57+
import org.apache.daffodil.japi.infoset.*;
58+
5659
public class TestJavaAPI {
5760

5861
/**
@@ -262,7 +265,7 @@ public void testJavaAPI2() throws IOException, ClassNotFoundException {
262265

263266
/**
264267
* Verify that we can detect when the parse did not consume all the data.
265-
*
268+
*
266269
* @throws IOException
267270
*/
268271
@Test
@@ -393,7 +396,7 @@ public void testJavaAPI5() throws IOException, ClassNotFoundException {
393396
/***
394397
* Verify that the compiler throws a FileNotFound exception when fed a list
395398
* of schema files that do not exist.
396-
*
399+
*
397400
* @throws IOException
398401
*/
399402
@Test
@@ -415,7 +418,7 @@ public void testJavaAPI6() throws IOException {
415418
/**
416419
* Tests a user submitted case where the XML appears to be serializing odd
417420
* xml entities into the output.
418-
*
421+
*
419422
* @throws IOException
420423
*/
421424
@Test
@@ -452,7 +455,7 @@ public void testJavaAPI7() throws IOException, ClassNotFoundException {
452455
* that this test uses double newline as a terminator for the first element
453456
* in the sequence rather than double newline as a separator for the
454457
* sequence
455-
*
458+
*
456459
* @throws IOException
457460
*/
458461
@Test
@@ -931,7 +934,6 @@ public void testJavaAPI20() throws IOException, ClassNotFoundException {
931934
assertEquals("42", unparseBos.toString());
932935
}
933936

934-
935937
@Test
936938
public void testJavaAPI21() throws IOException, ClassNotFoundException {
937939
// Test SAX parsing with errors
@@ -1213,4 +1215,81 @@ public void testJavaAPI26() throws IOException, ClassNotFoundException, External
12131215
assertTrue(DaffodilXMLEntityResolver.getXMLEntityResolver() != null);
12141216
assertTrue(DaffodilXMLEntityResolver.getLSResourceResolver() != null);
12151217
}
1218+
1219+
@Test
1220+
public void testJavaAPINullXMLTextEscapeStyle() throws IOException, ClassNotFoundException {
1221+
ByteArrayOutputStream xmlBos = new ByteArrayOutputStream();
1222+
try {
1223+
XMLTextInfosetOutputter outputter = new XMLTextInfosetOutputter(xmlBos, true, null);
1224+
} catch (Exception e) {
1225+
String msg = e.getMessage().toLowerCase();
1226+
assertTrue(msg.contains("unrecognized"));
1227+
assertTrue(msg.contains("null"));
1228+
assertTrue(msg.contains("xmltextescapestyle"));
1229+
}
1230+
}
1231+
1232+
@Test
1233+
public void testJavaAPICDATA1() throws IOException, ClassNotFoundException {
1234+
String expected = "NO_WHITESPACE_OR_SPECIAL_CHARS";
1235+
String data = "NO_WHITESPACE_OR_SPECIAL_CHARS$";
1236+
String schemaType = "string";
1237+
doXMLTextEscapeStyleTest(expected, data, schemaType);
1238+
}
1239+
1240+
@Test
1241+
public void testJavaAPICDATA2() throws IOException, ClassNotFoundException {
1242+
String expected = "<![CDATA[ 'some' stuff here &#xE000; and ]]]]><![CDATA[> even]]>";
1243+
String data = " 'some' stuff here &#xE000; and ]]> even$";
1244+
String schemaType = "string";
1245+
doXMLTextEscapeStyleTest(expected, data, schemaType);
1246+
}
1247+
1248+
@Test
1249+
public void testJavaAPICDATA3() throws IOException, ClassNotFoundException {
1250+
String expected = "6.892";
1251+
String data = "6.892";
1252+
String schemaType = "float";
1253+
doXMLTextEscapeStyleTest(expected, data, schemaType);
1254+
}
1255+
1256+
@Test
1257+
public void testJavaAPICDATA4() throws IOException, ClassNotFoundException {
1258+
String expected = "<![CDATA[this contains a CRLF\nline ending]]>";
1259+
String data = "this contains a CRLF\r\nline ending$";
1260+
String schemaType = "string";
1261+
doXMLTextEscapeStyleTest(expected, data, schemaType);
1262+
}
1263+
1264+
@Test
1265+
public void testJavaAPICDATA5() throws IOException, ClassNotFoundException {
1266+
String expected = "<![CDATA[abcd&gt]]>";
1267+
String data = "abcd&gt$";
1268+
String schemaType = "string";
1269+
doXMLTextEscapeStyleTest(expected, data, schemaType);
1270+
}
1271+
1272+
public void doXMLTextEscapeStyleTest(String expect, String data, String schemaType)
1273+
throws IOException, ClassNotFoundException {
1274+
1275+
org.apache.daffodil.japi.Compiler c = Daffodil.compiler();
1276+
java.io.File schemaFile = getResource("/test/japi/mySchemaCDATA.dfdl.xsd");
1277+
ProcessorFactory pf = c.compileFile(schemaFile, schemaType, null);
1278+
DataProcessor dp = pf.onPath("/");
1279+
1280+
ByteArrayInputStream is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8));
1281+
InputSourceDataInputStream input = new InputSourceDataInputStream(is);
1282+
ByteArrayOutputStream bosDP = new ByteArrayOutputStream();
1283+
XMLTextInfosetOutputter outputter = new XMLTextInfosetOutputter(bosDP, true, XMLTextEscapeStyle.CDATA);
1284+
ParseResult res = dp.parse(input, outputter);
1285+
boolean err = res.isError();
1286+
1287+
String infosetDPString = bosDP.toString();
1288+
int start = infosetDPString.indexOf(".com\">") + 6;
1289+
int end = infosetDPString.indexOf("</tns");
1290+
String value = infosetDPString.substring(start, end);
1291+
1292+
assertFalse(err);
1293+
assertEquals(expect, value);
1294+
}
12161295
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one or more
4+
contributor license agreements. See the NOTICE file distributed with
5+
this work for additional information regarding copyright ownership.
6+
The ASF licenses this file to You under the Apache License, Version 2.0
7+
(the "License"); you may not use this file except in compliance with
8+
the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
-->
18+
19+
<schema xmlns="http://www.w3.org/2001/XMLSchema"
20+
targetNamespace="http://example.com" xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
21+
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
22+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="http://example.com">
23+
24+
<annotation>
25+
<appinfo source="http://www.ogf.org/dfdl/">
26+
<dfdl:format ref="tns:GeneralFormat" />
27+
</appinfo>
28+
</annotation>
29+
30+
<include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
31+
32+
<element name="string" type="xsd:string" dfdl:lengthKind="delimited" dfdl:terminator="$"/>
33+
<element name="float" type="xsd:float"
34+
dfdl:representation="text"
35+
dfdl:textNumberRep="standard"
36+
dfdl:lengthKind="delimited"
37+
dfdl:encoding="UTF-8"
38+
dfdl:textNumberPattern="0.000"
39+
dfdl:textStandardDecimalSeparator="." />
40+
41+
</schema>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.daffodil.infoset
19+
/**
20+
* XMLTextEscapeStyle determines how to wrap values of elements of type xs:string
21+
*
22+
* Standard - Special characters (quotation mark, ampersand, less-than, greater-than)
23+
* in the text of xs:string elements are escaped, while non-special characters are
24+
* written as is.
25+
*
26+
* CDATA - The text of xs:string elements are wrapped in CDATA tags if the string
27+
* contains whitespace or special characters (quotation mark, ampersand, less-than,
28+
* greater-than)
29+
*/
30+
object XMLTextEscapeStyle extends Enumeration {
31+
type XMLTextEscapeStyle = Value
32+
val Standard, CDATA = Value
33+
}

daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala

+23-5
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,16 @@ import org.apache.daffodil.util.Indentable
3131
* @param writer The writer to write the XML text to
3232
* @param pretty Whether or to enable pretty printing. Set to true, XML
3333
* elements are indented and newlines are inserted.
34+
* @param xmlTextEscapeStyle Determine whether to wrap values of elements of type
35+
* xs:string in CDATA tags in order to preserve whitespace.
3436
*/
35-
class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean)
37+
class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean,
38+
xmlTextEscapeStyle: XMLTextEscapeStyle.Value)
3639
extends InfosetOutputter with Indentable with XMLInfosetOutputter {
3740

38-
def this(os: java.io.OutputStream, pretty: Boolean) = {
39-
this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty)
41+
def this(os: java.io.OutputStream, pretty: Boolean,
42+
xmlTextEscapeStyle: XMLTextEscapeStyle.Value = XMLTextEscapeStyle.Standard) = {
43+
this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, xmlTextEscapeStyle)
4044
}
4145

4246
private val sb = new StringBuilder()
@@ -160,7 +164,21 @@ class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean)
160164
if (simple.erd.runtimeProperties.get(XMLTextInfoset.stringAsXml) == "true") {
161165
writeStringAsXml(simpleVal)
162166
} else {
163-
writer.write(scala.xml.Utility.escape(remapped(simpleVal)))
167+
val xmlSafe = remapped(simpleVal)
168+
val escaped = xmlTextEscapeStyle match {
169+
case XMLTextEscapeStyle.CDATA => {
170+
val needsCDataEscape = xmlSafe.exists { c =>
171+
scala.xml.Utility.Escapes.escMap.contains(c) || c.isWhitespace
172+
}
173+
if (needsCDataEscape) {
174+
"<![CDATA[%s]]>".format(xmlSafe.replaceAll("]]>", "]]]]><![CDATA[>"))
175+
} else {
176+
xmlSafe
177+
}
178+
}
179+
case XMLTextEscapeStyle.Standard => scala.xml.Utility.escape(xmlSafe)
180+
}
181+
writer.write(escaped)
164182
}
165183
} else {
166184
writer.write(simple.dataValueAsString)
@@ -170,7 +188,7 @@ class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean)
170188
outputEndTag(simple)
171189
inScopeComplexElementHasChildren = true
172190
}
173-
191+
174192
override def endSimple(simple: DISimple): Unit = {
175193
// do nothing, everything is done in startSimple
176194
}

0 commit comments

Comments
 (0)