diff --git a/daffodil-cli/src/it/scala/org/apache/daffodil/xml/TestXMLConversionControl.scala b/daffodil-cli/src/it/scala/org/apache/daffodil/xml/TestXMLConversionControl.scala new file mode 100644 index 0000000000..c0b19927db --- /dev/null +++ b/daffodil-cli/src/it/scala/org/apache/daffodil/xml/TestXMLConversionControl.scala @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.xml + +import org.apache.commons.io.FileUtils +import org.junit.Test +import org.apache.daffodil.CLI.Util._ +import org.apache.daffodil.Main.ExitCode +import org.junit.Assert.assertTrue + +import java.nio.charset.StandardCharsets + +class TestXMLConversionControl { + + // + // To run tests conveniently under IntelliJ IDEA, + // rename the src/test dir to src/test1. Rename the src/it dir to src/test. + // Then modify this val to be "test". + // Then you can run these as ordinary junit-style tests under the IDE. + val test = "it" + + @Test def test_CLI_XMLConversionControlConvertCR(): Unit = { + withTempFile { output => + val schema = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd") + val config = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/config-convertCR.cfg.xml") + val input = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin") + + runCLI(args"parse -s $schema -c $config --root a -o $output $input") { + cli => //ok + }(ExitCode.Success) + + val res = FileUtils.readFileToString(output.toFile, StandardCharsets.UTF_8) + assertTrue(res.contains("abc\ndef\nghi")) + } + } + + @Test def test_CLI_XMLConversionControlPreserveCRParse(): Unit = { + withTempFile { output => + val schema = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd") + val config = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/config-preserveCR.cfg.xml") + val input = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin") + + runCLI(args"parse -s $schema -c $config --root a -o $output $input") { cli => + //ok + }(ExitCode.Success) + + val res = FileUtils.readFileToString(output.toFile, StandardCharsets.UTF_8) + assertTrue(res.contains("abc\uE00D\ndef\uE00D\nghi")) + } + } + + @Test def test_CLI_XMLConversionControlPreserveCRRoundTrip(): Unit = { + withTempFile { output => + withTempFile { xmlOut => + val schema = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd") + val config = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/config-preserveCR.cfg.xml") + val input = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin") + + var cmd = args"parse -s $schema -c $config --root a -o $xmlOut $input " + runCLI(cmd) { cli => + // ok + }(ExitCode.Success) + + cmd = args"unparse -s $schema -c $config --root a -o $output $xmlOut" + runCLI(cmd) { cli => + // ok + }(ExitCode.Success) + + + val xml = FileUtils.readFileToString(xmlOut.toFile, StandardCharsets.UTF_8) + assertTrue(xml.toString.contains("abc\uE00D\ndef\uE00D\nghi")) + } + + val xml = FileUtils.readFileToString(output.toFile, StandardCharsets.UTF_8) + assertTrue(xml.toString.contains("abc\r\ndef\r\nghi")) + } + } + + @Test def test_CLI_XMLConversionControlPreserveCRUnparseToFile(): Unit = { + withTempFile { output => + val schema = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd") + val config = path(s"daffodil-cli/src/$test/resources/org/apache/daffodil/CLI/config-preserveCR.cfg.xml") + + runCLI(args"unparse -s $schema -c $config --root a -o $output ") { cli => + cli.send("abc\uE00D\ndef\uE00D\nghi", inputDone = true) + }(ExitCode.Success) + + val res = FileUtils.readFileToString(output.toFile, StandardCharsets.UTF_8) + assertTrue(res.contains("abc\r\ndef\r\nghi")) + } + } +} diff --git a/daffodil-cli/src/main/scala/org/apache/daffodil/InfosetTypes.scala b/daffodil-cli/src/main/scala/org/apache/daffodil/InfosetTypes.scala index 375286689e..42853dca71 100644 --- a/daffodil-cli/src/main/scala/org/apache/daffodil/InfosetTypes.scala +++ b/daffodil-cli/src/main/scala/org/apache/daffodil/InfosetTypes.scala @@ -276,7 +276,7 @@ case class XMLTextInfosetHandler(dataProcessor: DataProcessor) extends InfosetHandler { def parse(input: InputSourceDataInputStream, os: OutputStream): InfosetParseResult = { - val output = new XMLTextInfosetOutputter(os, pretty = true) + val output = new XMLTextInfosetOutputter(os, pretty = true, dataProcessor.daffodilConfig.xmlConversionControl) val pr = parseWithInfosetOutputter(input, output) new InfosetParseResult(pr) } @@ -286,7 +286,7 @@ case class XMLTextInfosetHandler(dataProcessor: DataProcessor) case bytes: Array[Byte] => new ByteArrayInputStream(bytes) case is: InputStream => is } - val input = new XMLTextInfosetInputter(is) + val input = new XMLTextInfosetInputter(is, dataProcessor.daffodilConfig.xmlConversionControl) val ur = unparseWithInfosetInputter(input, output) ur } @@ -330,14 +330,14 @@ case class JDOMInfosetHandler(dataProcessor: DataProcessor) extends InfosetHandler { def parse(input: InputSourceDataInputStream, os: OutputStream): InfosetParseResult = { - val output = new JDOMInfosetOutputter() + val output = new JDOMInfosetOutputter(dataProcessor.daffodilConfig.xmlConversionControl) val pr = parseWithInfosetOutputter(input, output) new JDOMInfosetParseResult(pr, output) } def unparse(data: AnyRef, output: DFDL.Output): UnparseResult = { val doc = data.asInstanceOf[org.jdom2.Document] - val input = new JDOMInfosetInputter(doc) + val input = new JDOMInfosetInputter(doc, dataProcessor.daffodilConfig.xmlConversionControl) val ur = unparseWithInfosetInputter(input, output) ur } @@ -372,14 +372,14 @@ case class ScalaXMLInfosetHandler(dataProcessor: DataProcessor) extends InfosetHandler { def parse(input: InputSourceDataInputStream, os: OutputStream): InfosetParseResult = { - val output = new ScalaXMLInfosetOutputter() + val output = new ScalaXMLInfosetOutputter(dataProcessor.daffodilConfig.xmlConversionControl) val pr = parseWithInfosetOutputter(input, output) new ScalaXMLInfosetParseResult(pr, output) } def unparse(data: AnyRef, output: DFDL.Output): UnparseResult = { val node = data.asInstanceOf[scala.xml.Node] - val input = new ScalaXMLInfosetInputter(node) + val input = new ScalaXMLInfosetInputter(node, dataProcessor.daffodilConfig.xmlConversionControl) val ur = unparseWithInfosetInputter(input, output) ur } @@ -414,15 +414,17 @@ class ScalaXMLInfosetParseResult(parseResult: ParseResult, output: ScalaXMLInfos case class W3CDOMInfosetHandler(dataProcessor: DataProcessor) extends InfosetHandler { + private val xcc = dataProcessor.daffodilConfig.xmlConversionControl + def parse(input: InputSourceDataInputStream, os: OutputStream): InfosetParseResult = { - val output = new W3CDOMInfosetOutputter() + val output = new W3CDOMInfosetOutputter(xcc) val pr = parseWithInfosetOutputter(input, output) new W3CDOMInfosetParseResult(pr, output) } def unparse(data: AnyRef, output: DFDL.Output): UnparseResult = { val doc = data.asInstanceOf[ThreadLocal[org.w3c.dom.Document]].get - val input = new W3CDOMInfosetInputter(doc) + val input = new W3CDOMInfosetInputter(doc, xcc) val ur = unparseWithInfosetInputter(input, output) ur } @@ -473,18 +475,18 @@ case class NULLInfosetHandler(dataProcessor: DataProcessor) } def unparse(data: AnyRef, output: DFDL.Output): UnparseResult = { - val events = data.asInstanceOf[Array[NullInfosetInputter.Event]] - val input = new NullInfosetInputter(events) + val is = data match { + case bytes: Array[Byte] => new ByteArrayInputStream(bytes) + case is: InputStream => is + } + val input = new NullInfosetInputter(is, dataProcessor.daffodilConfig.xmlConversionControl) val ur = unparseWithInfosetInputter(input, output) ur } - def dataToInfoset(bytes: Array[Byte]): AnyRef = dataToInfoset(new ByteArrayInputStream(bytes)) + def dataToInfoset(bytes: Array[Byte]): AnyRef = bytes - def dataToInfoset(stream: InputStream): AnyRef = { - val events = NullInfosetInputter.toEvents(stream) - events - } + def dataToInfoset(stream: InputStream): AnyRef = stream } /** diff --git a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala index 3afea040a2..9e720703c5 100644 --- a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala +++ b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala @@ -874,7 +874,7 @@ object Main { val tunables = DaffodilTunables.configPlusMoreTunablesMap(performanceOpts.tunables, optDafConfig) createProcessorFromSchema(performanceOpts.schema(), performanceOpts.rootNS.toOption, performanceOpts.path.toOption, tunables, validate) } - }.map{ _.withExternalVariables(combineExternalVariables(performanceOpts.vars, optDafConfig)) } + }.map{ _.withExternalVariables(combineExternalVariables(performanceOpts.vars, dafConfig)) } .map{ _.withValidationMode(validate) } val rc: ExitCode.Value = processor match { diff --git a/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd b/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd new file mode 100644 index 0000000000..c64575ea9b --- /dev/null +++ b/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/aString.dfdl.xsd @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + diff --git a/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin b/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin new file mode 100644 index 0000000000..f5bc74f655 --- /dev/null +++ b/daffodil-cli/src/test/resources/org/apache/daffodil/CLI/input/inputWithCRLFs.bin @@ -0,0 +1,3 @@ +abc +def +ghi \ No newline at end of file diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/Facets.scala b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/Facets.scala index 473f26c72a..8ce2d663cd 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/dsom/Facets.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/dsom/Facets.scala @@ -21,10 +21,41 @@ import java.math.BigInteger import scala.xml.Node import org.apache.daffodil.exceptions.Assert import org.apache.daffodil.dpath.NodeInfo.PrimType -import org.apache.daffodil.xml.XMLUtils +import org.apache.daffodil.xml.RemapPUAToXMLIllegalChar + +object Facets { + + /** + * Remapper used to convert pattern facet values + * so that they can describe the DFDL infoset (for use + * in our limited Daffodil-itself validation, as well + * as the same pattern being useful for full validation + * by a regular XSD validator. + * + * A regular XML validator (ex: Xerces) will need to look at the + * infoset as we've mapped it to the PUA. Hence, if the + * pattern is looking for say, control characters, it cannot + * look for control-A (U+0001), because that will have been + * remapped to U+E001. + * + * So the pattern facet value will have E001 in it, likely + * expressed as ``. That will work fine for + * external validation by Xerces or other. + * + * But Daffodil's internal (aka limited) validation operates + * on the regular DFDL infoset, before any remapping for XML occurs. + * + * So we instead map the pattern facet value itself down + * so that the `` in the pattern turns into an actual + * NUL (\u0000 or \x00) in the regex as is used for limited validation. + */ + private val remapper = + new RemapPUAToXMLIllegalChar() +} trait Facets { self: Restriction => import org.apache.daffodil.dsom.FacetTypes._ + import Facets._ private def retrieveFacetValueFromRestrictionBase(xml: Node, facetName: Facet.Type): String = { val res = xml \\ "restriction" \ facetName.toString() \ "@value" @@ -151,7 +182,17 @@ trait Facets { self: Restriction => // The XSD numeric character entity  can be used to match ASCII NUL // (char code 0). // - val remapped: String = XMLUtils.remapPUAToXMLIllegalCharacters(v) + // This remapping is for pattern facets, which are inside a DFDL schema, + // and so will not contain CR characters, since XML reading will convert those + // to LF. To discuss CR in this pattern we can't use ` ` syntax because that + // turns into a CR which gets turned into a LF. Plus the pattern value is + // an XML attribute, the value of which gets its whitespace collapsed, all + // line-ending chars converted to spaces, and adjacent spaces collapsed to one. + // + // So a pattern facet must use `\r` and '\n' to describe line-endings within the pattern. + // And in general one must be careful about whitespace. + // + val remapped: String = remapper.remap(v) (f, remapped.r) } } diff --git a/daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java b/daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java index 8b34d0d770..f71236add8 100644 --- a/daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java +++ b/daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java @@ -127,7 +127,7 @@ public void testJavaAPI1() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -137,7 +137,7 @@ public void testJavaAPI1() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -174,7 +174,7 @@ public void testJavaAPI1_A() throws Exception { byte[] ba = FileUtils.readFileToByteArray(data); ByteBuffer bb = ByteBuffer.wrap(ba); InputSourceDataInputStream dis = new InputSourceDataInputStream(bb); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = parser.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -184,7 +184,7 @@ public void testJavaAPI1_A() throws Exception { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -236,7 +236,7 @@ public void testJavaAPI2() throws IOException, ClassNotFoundException { // and byte buffer. byte[] ba = FileUtils.readFileToByteArray(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(ba); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); // TODO: NEED a java friendly way to get the status of the outputter. @@ -278,7 +278,7 @@ public void testJavaAPI3() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData16.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -287,7 +287,7 @@ public void testJavaAPI3() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -318,7 +318,7 @@ public void testJavaAPI3_A() throws Exception { java.io.File file = getResource("/test/japi/myData16.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = parser.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -327,7 +327,7 @@ public void testJavaAPI3_A() throws Exception { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -346,7 +346,7 @@ public void testJavaAPI4b() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData2.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -355,7 +355,7 @@ public void testJavaAPI4b() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -374,7 +374,7 @@ public void testJavaAPI5() throws IOException, ClassNotFoundException { // bytes java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -383,7 +383,7 @@ public void testJavaAPI5() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -433,14 +433,14 @@ public void testJavaAPI7() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/01very_simple.txt"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -466,14 +466,14 @@ public void testJavaAPI8() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/01very_simple.txt"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult()); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(outputter.getResult(), dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter, wbc); err = res2.isError(); assertFalse(err); @@ -495,7 +495,7 @@ public void testJavaAPI9() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/01very_simple.txt"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -504,7 +504,7 @@ public void testJavaAPI9() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos1 = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc1 = java.nio.channels.Channels.newChannel(bos1); - JDOMInfosetInputter inputter1 = new JDOMInfosetInputter(doc1); + JDOMInfosetInputter inputter1 = new JDOMInfosetInputter(doc1, dp.daffodilConfig().xmlConversionControl()); UnparseResult res2 = dp.unparse(inputter1, wbc1); err = res2.isError(); assertFalse(err); @@ -514,7 +514,7 @@ public void testJavaAPI9() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos2 = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc2 = java.nio.channels.Channels.newChannel(bos2); - JDOMInfosetInputter inputter2 = new JDOMInfosetInputter(doc2); + JDOMInfosetInputter inputter2 = new JDOMInfosetInputter(doc2, dp.daffodilConfig().xmlConversionControl()); UnparseResult res3 = dp.unparse(inputter2, wbc2); err = res3.isError(); assertFalse(err); @@ -536,7 +536,7 @@ public void testJavaAPI10() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData4.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -563,7 +563,7 @@ public void testJavaAPI11() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData5.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -605,7 +605,7 @@ public void testJavaAPI12() throws IOException, ClassNotFoundException { java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -635,7 +635,7 @@ public void testJavaAPI13() throws IOException, ClassNotFoundException, External java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -669,7 +669,7 @@ public void testJavaAPI14() throws IOException, ClassNotFoundException, External java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -707,7 +707,7 @@ public void testJavaAPI15() throws IOException, ClassNotFoundException { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); java.nio.channels.WritableByteChannel wbc = java.nio.channels.Channels.newChannel(bos); - JDOMInfosetInputter inputter = new JDOMInfosetInputter(doc); + JDOMInfosetInputter inputter = new JDOMInfosetInputter(doc, dp.daffodilConfig().xmlConversionControl()); UnparseResult res = dp.unparse(inputter, wbc); boolean err = res.isError(); assertTrue(err); @@ -732,7 +732,7 @@ public void testJavaAPI16() throws IOException, InvalidUsageException, ClassNotF java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); assertTrue(res.isError()); assertFalse(res.isProcessingError()); @@ -758,7 +758,7 @@ public void testJavaAPI17() throws IOException, InvalidUsageException, ClassNotF java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); assertTrue(res.isError()); assertFalse(res.isProcessingError()); @@ -799,7 +799,7 @@ public void testJavaAPI18() throws IOException, ClassNotFoundException { java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream input = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = null; boolean err = false; @@ -840,7 +840,7 @@ public void testJavaAPI19() throws IOException, ClassNotFoundException { java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream input = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = null; boolean err = false; @@ -873,7 +873,7 @@ public void testJavaAPI20() throws IOException, ClassNotFoundException { InputSourceDataInputStream disDP = new InputSourceDataInputStream(fisDP); InputSourceDataInputStream disSAX = new InputSourceDataInputStream(fisSAX); ByteArrayOutputStream xmlBos = new ByteArrayOutputStream(); - XMLTextInfosetOutputter outputter = new XMLTextInfosetOutputter(xmlBos, true); + XMLTextInfosetOutputter outputter = new XMLTextInfosetOutputter(xmlBos, true, dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(disDP, outputter); String infosetDPString = xmlBos.toString(); @@ -994,7 +994,7 @@ public void testJavaAPI22_withExternalVariablesUsingAbstractMap() throws IOExcep java.io.File file = getResource("/test/japi/myData.dat"); java.io.FileInputStream fis = new java.io.FileInputStream(file); InputSourceDataInputStream dis = new InputSourceDataInputStream(fis); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); boolean err = res.isError(); assertFalse(err); @@ -1137,7 +1137,7 @@ public void testJavaAPI24() throws IOException, ClassNotFoundException, External byte[] ba = {}; ByteBuffer bb = ByteBuffer.wrap(ba); InputSourceDataInputStream dis = new InputSourceDataInputStream(bb); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); assertFalse(res.isError()); org.jdom2.Document doc = outputter.getResult(); @@ -1157,7 +1157,7 @@ public void testJavaAPI24() throws IOException, ClassNotFoundException, External byte[] ba = {}; ByteBuffer bb = ByteBuffer.wrap(ba); InputSourceDataInputStream dis = new InputSourceDataInputStream(bb); - JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(); + JDOMInfosetOutputter outputter = new JDOMInfosetOutputter(dp.daffodilConfig().xmlConversionControl()); ParseResult res = dp.parse(dis, outputter); assertFalse(res.isError()); org.jdom2.Document doc = outputter.getResult(); diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/api/DaffodilConfig.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/api/DaffodilConfig.scala index c8de735906..1ee8d294f9 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/api/DaffodilConfig.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/api/DaffodilConfig.scala @@ -16,7 +16,9 @@ */ package org.apache.daffodil.api +import org.apache.daffodil.api.XMLConversionControl.CarriageReturnMapping import org.apache.daffodil.externalvars.Binding +import org.apache.daffodil.util.Misc import org.apache.daffodil.xml.DaffodilXMLLoader import org.apache.daffodil.xml.NS import org.apache.daffodil.xml.XMLUtils @@ -26,8 +28,105 @@ import java.net.URI import scala.xml.Elem import scala.xml.Node -object DaffodilConfig { +/** + * Makes it simple to define enum corresponding to an XSD attribute declaration. + * + * TODO: Move to daffodil-lib xml package. + */ +trait AttrEnum extends Enumeration { + type Type = Value + + final def fromXML(xml: Node): Value = { + var rawtxt = (xml \ ("@" + attributeName)).text + val opt: Option[Value] = + if (rawtxt == "") + None + else + Some(withName(rawtxt)) + opt.getOrElse(default) + } + + def default: Value + + private lazy val nameFromClass = + Misc.toInitialLowerCaseUnlessAllUpperCase( + Misc.getNameFromClass(this) + ) + + /** + * object class name must match the config file element name + * except for starting with upper case letter. + * + * Or you can override. + */ + def attributeName = nameFromClass + + /** + * Appended to diagnostic messages. But these + * should not happen if the XML Loading does validation. + */ + protected def adviceString: String +} + +/** + * Makes it easy to construct structures corresponding + * to sub-elements. + */ +trait SubElement extends Serializable { + + final def parseFromParentXML(parentXML: Node): Option[Node] = { + var optNode = (parentXML \ subElementName).headOption + optNode + } + + private lazy val nameFromClass = + Misc.toInitialLowerCaseUnlessAllUpperCase( + Misc.getNameFromClass(this) + ) + def subElementName: String = nameFromClass +} + +/** + * For use with Config files for defining + * enums for XSD attributes. + */ +trait ConfigAttrEnum extends AttrEnum { + override protected def adviceString = "Config files should be XSD validated before processing them." +} + + +object XMLConversionControl extends SubElement { + override def subElementName = "xmlConversionControl" + + def apply(parentXML: Node) = + new XMLConversionControl(parseFromParentXML(parentXML)) + def apply() = + new XMLConversionControl(None) + /** + * TODO: for DAFFODIL-2234/DAFFODIL-2346 control of XMLTextEscapeStyle + * just add another object like CarriageReturnMapping, and + * create syntax for it in dafext.xsd as an attribute of + * the xmlConversionControl element. + */ + object CarriageReturnMapping extends ConfigAttrEnum { + val ConvertCR2LF, PreserveCR = Value + + def default = ConvertCR2LF + + def apply(xml: Node) = fromXML(xml) + } +} + +class XMLConversionControl(xml: Node) extends Serializable { + def this(optNode: Option[Node]) = + this(optNode.getOrElse()) + + val crm = CarriageReturnMapping(xml) +} + + +object DaffodilConfig { /** * Create from a dafext:dfdlConfig element, which is often in a file. * Can also create from a tdml:defineConfig element, since the children @@ -36,17 +135,23 @@ object DaffodilConfig { * @param xml * @return */ - def fromXML(xml: Node) = { + def fromXML(xml: Node): DaffodilConfig = { val optBindingsNode = (xml \ "externalVariableBindings").headOption val extVarBindings = optBindingsNode.map{ Binding.getBindings(_) }.getOrElse(Seq()) val optTunablesXML = (xml \ "tunables").headOption /* had to add trim here to get rid of #PCDATA */ val tunablesMap = optTunablesXML.map{ DaffodilTunables.tunablesMap(_) }.getOrElse(Map.empty) - new DaffodilConfig(extVarBindings, tunablesMap) + + // XCC's are done differently as they are not generated (currently) + val xcc = XMLConversionControl(xml) + new DaffodilConfig(extVarBindings, tunablesMap, xcc) } - def fromSchemaSource(source: DaffodilSchemaSource) = { + def fromSchemaSource(source: DaffodilSchemaSource): DaffodilConfig = { val loader = new DaffodilXMLLoader() - var node = loader.load(source, None) // might not be daf:dfdlConfig, so don't validate. + // might not be daf:dfdlConfig, so don't validate. + // configs embedded in TDML have a different root element + // and are pre-validated as part of validating the TDML. + var node = loader.load(source, None) val rootElem = node.asInstanceOf[Elem] if (rootElem.label == "dfdlConfig" && NS(rootElem.namespace) == XMLUtils.EXT_NS_APACHE ) { @@ -57,9 +162,11 @@ object DaffodilConfig { fromXML(node) } - def fromURI(uri: URI) = fromSchemaSource(URISchemaSource(uri)) + def fromURI(uri: URI): DaffodilConfig = fromSchemaSource(URISchemaSource(uri)) + + def fromFile(file: File): DaffodilConfig = fromURI(file.toURI) - def fromFile(file: File) = fromURI(file.toURI) + def apply() = new DaffodilConfig(Seq(), Seq().toMap, XMLConversionControl()) } @@ -71,6 +178,8 @@ object DaffodilConfig { */ final class DaffodilConfig private ( val externalVariableBindings: Seq[Binding], - val tunablesMap: Map[String, String]) { + val tunablesMap: Map[String, String], + val xmlConversionControl: XMLConversionControl) + extends Serializable { // no methods } diff --git a/daffodil-lib/src/test/resources/test/configExample.cfg b/daffodil-lib/src/test/resources/test/configExample.cfg.xml similarity index 77% rename from daffodil-lib/src/test/resources/test/configExample.cfg rename to daffodil-lib/src/test/resources/test/configExample.cfg.xml index d9c00b2927..bf57ec9237 100644 --- a/daffodil-lib/src/test/resources/test/configExample.cfg +++ b/daffodil-lib/src/test/resources/test/configExample.cfg.xml @@ -18,13 +18,14 @@ - - littleEndian - - - - facetExplicitLengthOutOfRange + + littleEndian + + + encodingErrorPolicyError - - + facetExplicitLengthOutOfRange + + + diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/debugger/InteractiveDebugger.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/debugger/InteractiveDebugger.scala index 2b21998b10..78856d76d7 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/debugger/InteractiveDebugger.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/debugger/InteractiveDebugger.scala @@ -18,8 +18,8 @@ package org.apache.daffodil.debugger import java.io.File - import org.apache.daffodil.BasicComponent +import org.apache.daffodil.api.DaffodilConfig import org.apache.daffodil.api.DaffodilTunables import org.apache.daffodil.dpath.ExpressionEvaluationException import org.apache.daffodil.dpath.NodeInfo @@ -435,9 +435,9 @@ class InteractiveDebugger(runner: InteractiveDebuggerRunner, eCompilers: Express } } - private def infosetToString(ie: InfosetElement): String = { + private def infosetToString(ie: InfosetElement, daffodilConfig: DaffodilConfig): String = { val bos = new java.io.ByteArrayOutputStream() - val xml = new XMLTextInfosetOutputter(bos, true) + val xml = new XMLTextInfosetOutputter(bos, true, daffodilConfig.xmlConversionControl) val iw = InfosetWalker( ie.asInstanceOf[DIElement], xml, @@ -448,8 +448,8 @@ class InteractiveDebugger(runner: InteractiveDebuggerRunner, eCompilers: Express bos.toString("UTF-8") } - private def debugPrettyPrintXML(ie: InfosetElement): Unit = { - val infosetString = infosetToString(ie) + private def debugPrettyPrintXML(ie: InfosetElement, dc: DaffodilConfig): Unit = { + val infosetString = infosetToString(ie, dc) debugPrintln(infosetString) } @@ -987,10 +987,10 @@ class InteractiveDebugger(runner: InteractiveDebuggerRunner, eCompilers: Express debugPrintln(_) } res match { - case ie: InfosetElement => debugPrettyPrintXML(ie) + case ie: InfosetElement => debugPrettyPrintXML(ie, state.dataProc.get.daffodilConfig) case nodeSeq: Seq[Any] => nodeSeq.foreach { a => a match { - case ie: InfosetElement => debugPrettyPrintXML(ie) + case ie: InfosetElement => debugPrettyPrintXML(ie, state.dataProc.get.daffodilConfig) case _ => debugPrintln(a) } } @@ -1024,7 +1024,7 @@ class InteractiveDebugger(runner: InteractiveDebuggerRunner, eCompilers: Express // // Displays the empty element since it has no value. // - debugPrettyPrintXML(nd.diElement) + debugPrettyPrintXML(nd.diElement, state.dataProc.get.daffodilConfig) state.suppressDiagnosticAndSucceed(r) } case _ => throw r @@ -1548,7 +1548,7 @@ class InteractiveDebugger(runner: InteractiveDebuggerRunner, eCompilers: Express debugPrintln("No Infoset", " ") } case _ => { - val infosetString = infosetToString(node) + val infosetString = infosetToString(node, state.dataProc.get.daffodilConfig) val lines = infosetString.split("\r?\n") val dropCount = diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetInputter.scala index f6c77afec5..89a7e34a12 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetInputter.scala @@ -17,6 +17,7 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl import org.apache.daffodil.util.MStackOf import org.apache.daffodil.util.MaybeBoolean import org.apache.daffodil.xml.XMLUtils @@ -37,7 +38,9 @@ object JDOMInfosetInputter { } class JDOMInfosetInputter(doc: Document) - extends InfosetInputter { + override val xmlConversionControl: XMLConversionControl) + extends InfosetInputter + with XMLInfosetInputterMixin { /** * This stack represents the stack of elements that have been visited. Each @@ -94,7 +97,7 @@ class JDOMInfosetInputter(doc: Document) case _ => throw new NonTextFoundInSimpleContentException(stack.top._1.getQualifiedName) } if (primType.isInstanceOf[NodeInfo.String.Kind]) { - XMLUtils.remapPUAToXMLIllegalCharacters(text) + remapped(text) } else { text } diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetOutputter.scala index a7aef3baa4..9a1a7a4868 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JDOMInfosetOutputter.scala @@ -17,14 +17,15 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl import org.apache.daffodil.util.Maybe import org.apache.daffodil.xml.XMLUtils import org.apache.daffodil.util.MStackOf import org.apache.daffodil.exceptions.Assert import org.apache.daffodil.dpath.NodeInfo -class JDOMInfosetOutputter extends InfosetOutputter - with XMLInfosetOutputter { +class JDOMInfosetOutputter(override val xmlConversionControl: XMLConversionControl) extends InfosetOutputter + with XMLInfosetOutputterMixin { private val stack = new MStackOf[org.jdom2.Parent] private var result: Maybe[org.jdom2.Document] = Maybe.Nope diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/NullInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/NullInfosetInputter.scala index a0b939f9ee..8d36630fbe 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/NullInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/NullInfosetInputter.scala @@ -17,10 +17,10 @@ package org.apache.daffodil.infoset -import java.io.InputStream +import org.apache.daffodil.api.XMLConversionControl +import java.io.InputStream import scala.collection.mutable.ArrayBuffer - import scala.xml.Elem import scala.xml.SAXParser import scala.xml.Text @@ -72,7 +72,7 @@ object NullInfosetInputter { val localName = elem.label val namespaceURI = elem.namespace val (simpleText, isNilled) = if (isSimple) { - val text = XMLUtils.remapPUAToXMLIllegalCharacters(elem.text) + val text = remapped(elem.text) val isNilled = elem.attribute(XMLUtils.XSI_NAMESPACE, "nil").map { attrs => val str = attrs.head.toString val value = str == "true" || str == "1" diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala index 765ffbe5f6..a1ea7d7a85 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala @@ -19,7 +19,6 @@ package org.apache.daffodil.infoset import java.net.URI import java.net.URISyntaxException - import org.apache.daffodil.api.DFDL import org.apache.daffodil.dpath.NodeInfo import org.apache.daffodil.exceptions.Assert @@ -32,7 +31,6 @@ import org.apache.daffodil.util.Maybe.One import org.apache.daffodil.util.Maybe.Nope import org.apache.daffodil.util.MaybeBoolean import org.apache.daffodil.util.Misc -import org.apache.daffodil.xml.XMLUtils /** * The SAXInfosetInputter worker coroutine receives batches of SAXInfosetEvent @@ -54,7 +52,9 @@ class SAXInfosetInputter( dp: DFDL.DataProcessor, output: DFDL.Output, resolveRelativeInfosetBlobURIs: Boolean) - extends InfosetInputter with Coroutine[Array[SAXInfosetEvent]] { + extends InfosetInputter with Coroutine[Array[SAXInfosetEvent]] + with XMLInfosetInputterMixin { + /** * The index into the batchedInfosetEvents array that the InfosetInputter is @@ -89,8 +89,7 @@ class SAXInfosetInputter( throw new NonTextFoundInSimpleContentException(getLocalName()) } if (primType eq NodeInfo.String) { - val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res) - remapped + remapped(res) } else if (resolveRelativeInfosetBlobURIs && (primType eq NodeInfo.AnyURI) && res.nonEmpty) { val absUri = resolveRelativeBlobURIs(res) absUri diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala index 9620a18f4e..af12a27f01 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala @@ -29,7 +29,7 @@ class SAXInfosetOutputter(xmlReader: DFDL.DaffodilParseXMLReader, val namespacesFeature: Boolean, val namespacePrefixesFeature: Boolean) extends InfosetOutputter - with XMLInfosetOutputter { + with XMLInfosetOutputterMixin { /** * Reset the internal state of this InfosetOutputter. This should be called diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetInputter.scala index 0fcc68f845..3cfe12bdc6 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetInputter.scala @@ -29,7 +29,8 @@ import scala.xml.ProcInstr import scala.xml.Comment class ScalaXMLInfosetInputter(rootNode: Node) - extends InfosetInputter { + extends InfosetInputter + with XMLInfosetInputterMixin { /** * This stack represents the stack of elements that have been visited. Each @@ -107,7 +108,7 @@ class ScalaXMLInfosetInputter(rootNode: Node) } val result = { if (primType.isInstanceOf[NodeInfo.String.Kind]) { - XMLUtils.remapPUAToXMLIllegalCharacters(text) + remapped(text) } else { text } diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetOutputter.scala index c45daa38da..2230d5f6fe 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/ScalaXMLInfosetOutputter.scala @@ -21,7 +21,6 @@ import scala.collection.mutable.ListBuffer import scala.xml.MetaData import scala.xml.Null import scala.xml.UnprefixedAttribute - import org.apache.daffodil.dpath.NodeInfo import org.apache.daffodil.exceptions.Assert import org.apache.daffodil.util.MStackOf @@ -30,7 +29,7 @@ import org.apache.daffodil.xml.XMLUtils class ScalaXMLInfosetOutputter(showFormatInfo: Boolean = false, showFreedInfo: Boolean = false) extends InfosetOutputter - with XMLInfosetOutputter { + with XMLInfosetOutputterMixin { protected val stack = new MStackOf[ListBuffer[scala.xml.Node]] private var resultNode: Maybe[scala.xml.Node] = Maybe.Nope diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetInputter.scala index 92c3e37eea..c610ee0240 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetInputter.scala @@ -17,11 +17,10 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl import org.apache.daffodil.util.MStackOf import org.apache.daffodil.util.MaybeBoolean -import org.apache.daffodil.xml.XMLUtils import org.apache.daffodil.dpath.NodeInfo - import org.w3c.dom.Document import org.w3c.dom.Element import org.w3c.dom.NodeList @@ -29,10 +28,12 @@ import org.w3c.dom.Text import org.w3c.dom.ProcessingInstruction import org.w3c.dom.Comment import org.w3c.dom.Node + import javax.xml.XMLConstants class W3CDOMInfosetInputter(doc: Document) - extends InfosetInputter { + extends InfosetInputter + with XMLInfosetInputterMixin { /** * This stack represents the stack of elements that have been visited. Each @@ -97,7 +98,7 @@ class W3CDOMInfosetInputter(doc: Document) case _ => throw new NonTextFoundInSimpleContentException(stack.top._1.getNodeName) } if (primType.isInstanceOf[NodeInfo.String.Kind]) { - XMLUtils.remapPUAToXMLIllegalCharacters(text) + remapped(text) } else { text } diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetOutputter.scala index 71b5099569..ae8ba36a91 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/W3CDOMInfosetOutputter.scala @@ -27,7 +27,8 @@ import org.w3c.dom.Element import org.w3c.dom.Node import javax.xml.parsers.DocumentBuilderFactory; -class W3CDOMInfosetOutputter extends InfosetOutputter +class W3CDOMInfosetOutputter() + extends InfosetOutputter with XMLInfosetOutputter { private var document: Document = null diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetInputterMixin.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetInputterMixin.scala new file mode 100644 index 0000000000..88169c6330 --- /dev/null +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetInputterMixin.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.apache.daffodil.api.XMLConversionControl +import org.apache.daffodil.xml.RemapPUAToXMLIllegalChar + +/** + * Centralized remapper in case we need to parameterize + * the charset remapping. + * + * Not necessary right now, but this is symmetric + * with XMLInfosetInputterMixin. + */ +trait XMLInfosetInputterMixin { + + def xmlConversionControl: XMLConversionControl + + private val remapper: RemapPUAToXMLIllegalChar = + new RemapPUAToXMLIllegalChar() + + def remapped(dataValueAsString: String) = + remapper.remap(dataValueAsString) +} diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputterMixin.scala similarity index 71% rename from daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputter.scala rename to daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputterMixin.scala index 8d6d209cd8..343e3ef702 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLInfosetOutputterMixin.scala @@ -17,14 +17,31 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl import org.apache.daffodil.util.Maybe -import org.apache.daffodil.xml.XMLUtils import org.apache.daffodil.equality._ +import org.apache.daffodil.xml.RemapXMLIllegalCharToPUA +import org.apache.daffodil.api.XMLConversionControl.CarriageReturnMapping._ -trait XMLInfosetOutputter { +trait XMLInfosetOutputterMixin { - def remapped(dataValueAsString: String) = XMLUtils.remapXMLIllegalCharactersToPUA(dataValueAsString) + def xmlConversionControl: XMLConversionControl + /** + * FIXME: must change to allow parameterization of replaceCRWithLF + * via the config file. + */ + private val remapper: RemapXMLIllegalCharToPUA = { + val replaceCRWithLF = + xmlConversionControl.crm match { + case ConvertCR2LF => true + case PreserveCR => false + } + new RemapXMLIllegalCharToPUA(checkForExistingPUA = true, replaceCRWithLF) + } + + def remapped(dataValueAsString: String) = + remapper.remap(dataValueAsString) /** * String suitable for use in the text of a Processing Instruction. diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetInputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetInputter.scala index d78c806b1e..3442bbbb86 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetInputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetInputter.scala @@ -17,6 +17,8 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl + import java.io.StringWriter import java.nio.charset.StandardCharsets import javax.xml.XMLConstants @@ -26,13 +28,11 @@ import javax.xml.stream.XMLStreamException import javax.xml.stream.XMLStreamReader import javax.xml.stream.XMLStreamWriter import javax.xml.stream.util.XMLEventAllocator - import org.apache.daffodil.dpath.NodeInfo import org.apache.daffodil.exceptions.Assert import org.apache.daffodil.infoset.InfosetInputterEventType._ import org.apache.daffodil.util.MaybeBoolean import org.apache.daffodil.util.Misc -import org.apache.daffodil.xml.XMLUtils object XMLTextInfoset { lazy val xmlInputFactory = { @@ -188,8 +188,9 @@ object XMLTextInfoset { } } -class XMLTextInfosetInputter(input: java.io.InputStream) +class XMLTextInfosetInputter(input: java.io.InputStream, extends InfosetInputter { + with XMLInfosetInputterMixin { /** * evAlloc is only to be used for diagnostic messages. It lets us easily @@ -308,7 +309,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream) } } if (primType == NodeInfo.String) { - XMLUtils.remapPUAToXMLIllegalCharacters(elementText) + remapped(elementText) } else { elementText } diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala index 54e1a647d5..0a4338c6f0 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala @@ -17,10 +17,11 @@ package org.apache.daffodil.infoset +import org.apache.daffodil.api.XMLConversionControl + import java.io.StringReader import java.nio.charset.StandardCharsets import javax.xml.stream.XMLStreamConstants._ - import org.apache.daffodil.dpath.NodeInfo import org.apache.daffodil.exceptions.Assert import org.apache.daffodil.util.Indentable @@ -32,8 +33,8 @@ import org.apache.daffodil.util.Indentable * @param pretty Whether or to enable pretty printing. Set to true, XML * elements are indented and newlines are inserted. */ -class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean) - extends InfosetOutputter with Indentable with XMLInfosetOutputter { +class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean, + extends InfosetOutputter with Indentable with XMLInfosetOutputterMixin { def this(os: java.io.OutputStream, pretty: Boolean) = { this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty) diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala index 6ad367b73a..ab7a71ba6f 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala @@ -152,7 +152,8 @@ class DaffodilParseXMLReader (dp: DataProcessor) extends DFDL.DaffodilParseXMLRe // creates SAXInfosetOutputter object and calls setBlobAttributes on it val sio = new SAXInfosetOutputter(this, saxNamespaceFeatureValue, - saxNamespacePrefixesFeatureValue) + saxNamespacePrefixesFeatureValue, + xmlConversionControl) sio.setBlobAttributes(saxBlobDirectoryPropertyValue, saxBlobPrefixPropertyValue, saxBlobSuffixPropertyValue diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala index 77d8d11997..e8baa94f79 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala @@ -27,6 +27,9 @@ import java.nio.charset.StandardCharsets import java.nio.file.Files import java.util.zip.GZIPOutputStream import org.apache.daffodil.Implicits._ +import org.apache.daffodil.api.DaffodilConfig + +import java.net.URI object INoWarn4 { ImplicitsSuppressUnusedImportWarning() } import org.apache.daffodil.api.DFDL @@ -101,8 +104,9 @@ object DataProcessor { ssrd: SchemaSetRuntimeData, tunables: DaffodilTunables, variableMap: VariableMap, // must be explicitly reset by save method + daffodilConfig: DaffodilConfig, validationMode: ValidationMode.Type, // must be explicitly turned off by save method - ) extends DataProcessor(ssrd, tunables, variableMap, validationMode) { + ) extends DataProcessor(ssrd, tunables, variableMap, daffodilConfig, validationMode) { override def withValidationMode(mode: ValidationMode.Type): DataProcessor = { if (mode == ValidationMode.Full) { @@ -326,7 +330,7 @@ class DataProcessor( validationMode match { case ValidationMode.Full | ValidationMode.Custom(_) => val bos = new java.io.ByteArrayOutputStream() - val xmlOutputter = new XMLTextInfosetOutputter(bos, false) + val xmlOutputter = new XMLTextInfosetOutputter(bos, false, xmlConversionControl) val teeOutputter = new TeeInfosetOutputter(output, xmlOutputter) (teeOutputter, One(bos)) case _ => diff --git a/daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala b/daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala index 6a914eb72a..152f6acd39 100644 --- a/daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala +++ b/daffodil-schematron/src/test/scala/org/apache/daffodil/validation/schematron/EmbeddedTesting.scala @@ -52,7 +52,7 @@ trait EmbeddedTesting { val bos = new ByteArrayOutputStream() val r1 = dp.parse( new InputSourceDataInputStream(new ByteArrayInputStream(bytes)), - new XMLTextInfosetOutputter(bos, true)) + new XMLTextInfosetOutputter(bos, true, xmlConversionControl)) verbose match { case Always | AnyError if r1.isError() => r1.getDiagnostics.foreach(println) diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetInputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetInputter.scala index 04aba24bd9..fbb03335d6 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetInputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetInputter.scala @@ -17,6 +17,9 @@ package org.apache.daffodil.tdml +import org.apache.daffodil.api.XMLConversionControl.CarriageReturnMapping.ConvertCR2LF +import org.apache.daffodil.api.XMLConversionControl.CarriageReturnMapping.PreserveCR + import java.net.URI import java.net.URISyntaxException import org.apache.daffodil.infoset.InfosetInputter @@ -71,9 +74,16 @@ class TDMLInfosetInputter(val scalaInputter: ScalaXMLInfosetInputter, others: Se val firstVersion = i.getSimpleText(primType, runtimeProperties) val finalVersion = i match { case _ if (firstVersion eq null) => "" - // the json infoset inputter maintains CRLF/CR, but XML converts CRLF/CR to - // LF. So if this is Json, then we want the CRLF/CR converted to LF - case jsonii: JsonInfosetInputter => firstVersion.replaceAll("(\r\n|\r)", "\n") + case jsonii: JsonInfosetInputter => { + this.scalaInputter.xmlConversionControl.crm match { + case ConvertCR2LF => + // the json infoset inputter maintains CRLF/CR, but in this case XML converts CRLF/CR to + // LF. So if this is Json, then we want the CRLF/CR converted to LF + firstVersion.replaceAll("(\r\n|\r)", "\n") + case PreserveCR => + firstVersion + } + } case _ => firstVersion } finalVersion diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala index b9f397b8b9..5822313197 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala @@ -211,7 +211,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) extends } override def unparse(infosetXML: scala.xml.Node, outStream: java.io.OutputStream): TDMLUnparseResult = { - val scalaInputter = new ScalaXMLInfosetInputter(infosetXML) + val scalaInputter = new ScalaXMLInfosetInputter(infosetXML, xmlConversionControl) // We can't compare against other inputters since we only have scala XML, // but we still need to use the TDMLInfosetInputter since it may make TDML // specific modifications to the input infoset (e.g. blob paths) @@ -246,7 +246,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) extends // Assert.usage(lengthLimitInBits >= 0) - val outputter = new TDMLInfosetOutputter() + val outputter = new TDMLInfosetOutputter(xmlConversionControl) outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix) val xri = dp.newXMLReaderInstance diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/xmlConversionControl.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/xmlConversionControl.tdml new file mode 100644 index 0000000000..4b208c8f2b --- /dev/null +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/xmlConversionControl.tdml @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + abc%#x0d;%#x0a;def%#x0d;%#x0a;ghi + + + + abc def ghi + + + + + + + abc%#x0d;%#x0a;def%#x0d;%#x0a;ghi + + + + abc def ghi + + + + + + + abc%#x0d;%#x0a;def%#x0d;%#x0a;ghi + + + + abc def ghi + + + + + diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestXMLConversionControl.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestXMLConversionControl.scala new file mode 100644 index 0000000000..59d5031904 --- /dev/null +++ b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestXMLConversionControl.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.section00.general + +/* This section00 is for testing general features of DFDL that are + * not related to any specific requirement + */ + +import org.junit.Test +import org.apache.daffodil.tdml.Runner + +object TestXMLConversionControl { + val testDir = "/org/apache/daffodil/section00/general/" + val runner = Runner(testDir, "xmlConversionControl.tdml") + +} + +class TestXMLConversionControl { + + import TestXMLConversionControl._ + + @Test def preserveCR_1(): Unit = runner.runOneTest("preserveCR_1") + @Test def preserveCR_2(): Unit = runner.runOneTest("preserveCR_2") + + @Test def convertCR_1(): Unit = runner.runOneTest("convertCR_1") + +}