diff --git a/src/main/java/com/siemens/ct/exi/json/EXIforJSONGenerator.java b/src/main/java/com/siemens/ct/exi/json/EXIforJSONGenerator.java index 1caa13a..416de6a 100644 --- a/src/main/java/com/siemens/ct/exi/json/EXIforJSONGenerator.java +++ b/src/main/java/com/siemens/ct/exi/json/EXIforJSONGenerator.java @@ -48,71 +48,69 @@ import com.siemens.ct.exi.values.StringValue; public class EXIforJSONGenerator extends AbstractEXIforJSON { - + public EXIforJSONGenerator() throws EXIException, IOException { super(); } - + public EXIforJSONGenerator(EXIFactory ef) throws EXIException, IOException { super(ef); } - + public EXIforJSONGenerator(String schemaId) throws EXIException, IOException { super(schemaId); } - + public EXIforJSONGenerator(EXIFactory ef, String schemaId) throws EXIException, IOException { super(ef, schemaId); } - + static PrintStream DEBUG = null; // static final boolean DEBUG = false; - static void printDebug(String s) { - if(DEBUG != null) { - DEBUG.println(s); + if (DEBUG != null) { + DEBUG.println(s); } } - + static void printDebugInd(int ind, String s) { - if(DEBUG != null) { - for(int i=0; i jsons = new ArrayList(); // Taki jsons.add("./../../../W3C/EXI/docs/json/V2/personnel_one.json"); jsons.add("./../../../W3C/EXI/docs/json/V2/personnel_two.json"); jsons.add("./../../../W3C/EXI/docs/json/V2/personnel_three.json"); - + // Some other samples jsons.add("./../../../W3C/EXI/docs/json/V2/test/bower.json"); jsons.add("./../../../W3C/EXI/docs/json/V2/test/door.jsonld"); jsons.add("./../../../W3C/EXI/docs/json/V2/test/package.json"); jsons.add("./../../../W3C/EXI/docs/json/V2/test/ui.resizable.jquery.json"); - + // "old" JSON tests - // GPX + // GPX jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/gpx/sample-set-1/gpx-1-1pts.json"); jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/gpx/sample-set-1/gpx-1-100pts.json"); jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/gpx/sample-set-1/gpx-1-200pts.json"); @@ -130,19 +128,19 @@ public static void main(String[] args) throws FileNotFoundException, EXIExceptio jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/openweathermap.org/sample-set-1/owm-1-400cities.json"); jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/openweathermap.org/sample-set-1/owm-1-500cities.json"); jsons.add("./../../../W3C/Group/EXI/TTFMS/data/JSON/openweathermap.org/sample-set-1/owm-1-1000cities.json"); - - + System.out.println("Name; JSON; V1; V2"); - for(String json: jsons) { + for (String json : jsons) { test(json); } } else { - // String s = "{\n \"keyNumber\": 123,\n \"keyArrayStrings\": [\n \"s1\",\n \"s2\"\n ]\n}"; - String s = "{\n \"glossary\": {\n \"title\": \"example glossary\",\n \"GlossDiv\": {\n \"title\": \"S\",\n \"GlossList\": {\n \"GlossEntry\": {\n \"ID\": \"SGML\",\n \"SortAs\": \"SGML\",\n \"GlossTerm\": \"Standard Generalized Markup Language\",\n \"Acronym\": \"SGML\",\n \"Abbrev\": \"ISO 8879:1986\",\n \"GlossDef\": {\n \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\",\n \"GlossSeeAlso\": [\n \"GML\",\n \"XML\"\n ]\n },\n \"GlossSee\": \"markup\"\n }\n }\n }\n }\n}"; + // String s = "{\n \"keyNumber\": 123,\n \"keyArrayStrings\": [\n + // \"s1\",\n \"s2\"\n ]\n}"; + String s = "{\n \"glossary\": {\n \"title\": \"example glossary\",\n \"GlossDiv\": {\n \"title\": \"S\",\n \"GlossList\": {\n \"GlossEntry\": {\n \"ID\": \"SGML\",\n \"SortAs\": \"SGML\",\n \"GlossTerm\": \"Standard Generalized Markup Language\",\n \"Acronym\": \"SGML\",\n \"Abbrev\": \"ISO 8879:1986\",\n \"GlossDef\": {\n \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\",\n \"GlossSeeAlso\": [\n \"GML\",\n \"XML\"\n ]\n },\n \"GlossSee\": \"markup\"\n }\n }\n }\n }\n}"; ByteArrayOutputStream baosV2 = new ByteArrayOutputStream(); EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); e4jGenerator.generate(new ByteArrayInputStream(s.getBytes(StandardCharsets.UTF_8)), baosV2); - + File f = File.createTempFile("exi4json", "exi"); FileOutputStream fos = new FileOutputStream(f); fos.write(baosV2.toByteArray()); @@ -151,7 +149,7 @@ public static void main(String[] args) throws FileNotFoundException, EXIExceptio } } - + private static void test(String json) throws FileNotFoundException, EXIException, IOException { ByteArrayOutputStream baosV1 = new ByteArrayOutputStream(); { @@ -159,38 +157,39 @@ private static void test(String json) throws FileNotFoundException, EXIException e4jGenerator.generate(new FileInputStream(json), baosV1); // System.out.println("Size V1: " + baosV1.size()); } - + ByteArrayOutputStream baosV2 = new ByteArrayOutputStream(); { EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); e4jGenerator.generate(new FileInputStream(json), baosV2); // System.out.println("Size V2: " + baosV2.size()); } - + System.out.println(json + "; " + (new File(json)).length() + "; " + baosV1.size() + "; " + baosV2.size()); } - + private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIException, IOException { EXIStreamEncoder streamEncoder = ef.createEXIStreamEncoder(); - + EXIBodyEncoder bodyEncoder = streamEncoder.encodeHeader(osEXI4JSON); - + JsonParser parser = Json.createParser(isJSON); - + int ind = 0; String key = null; - + bodyEncoder.encodeStartDocument(); - + while (parser.hasNext()) { Event e = parser.next(); - switch(e) { + switch (e) { case KEY_NAME: key = parser.getString(); break; case START_OBJECT: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_MAP, null); - if(key == null) { + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_MAP, + null); + if (key == null) { printDebugInd(ind, ""); } else { printDebugInd(ind, ""); @@ -205,8 +204,9 @@ private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE bodyEncoder.encodeEndElement(); break; case START_ARRAY: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_ARRAY, null); - if(key == null) { + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_ARRAY, + null); + if (key == null) { printDebugInd(ind, ""); } else { printDebugInd(ind, ""); @@ -221,9 +221,10 @@ private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE bodyEncoder.encodeEndElement(); break; case VALUE_STRING: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_STRING, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_STRING, + null); bodyEncoder.encodeCharacters(new StringValue(parser.getString())); - if(key == null) { + if (key == null) { printDebugInd(ind, "" + parser.getString() + ""); } else { printDebugInd(ind, "" + parser.getString() + ""); @@ -233,9 +234,10 @@ private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE bodyEncoder.encodeEndElement(); break; case VALUE_NUMBER: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NUMBER, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NUMBER, + null); bodyEncoder.encodeCharacters(new StringValue(parser.getString())); - if(key == null) { + if (key == null) { printDebugInd(ind, "" + parser.getString() + ""); } else { printDebugInd(ind, "" + parser.getString() + ""); @@ -246,21 +248,24 @@ private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE break; case VALUE_FALSE: case VALUE_TRUE: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_BOOLEAN, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, + EXI4JSONConstants.LOCALNAME_BOOLEAN, null); bodyEncoder.encodeCharacters(new StringValue((e == Event.VALUE_FALSE ? "false" : "true"))); - if(key == null) { + if (key == null) { printDebugInd(ind, "" + parser.getString() + ""); } else { - printDebugInd(ind, "" + (e == Event.VALUE_FALSE ? "false" : "true") + ""); + printDebugInd(ind, "" + (e == Event.VALUE_FALSE ? "false" : "true") + + ""); bodyEncoder.encodeAttribute("", EXI4JSONConstants.LOCALNAME_KEY, null, new StringValue(key)); key = null; } bodyEncoder.encodeEndElement(); break; case VALUE_NULL: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NULL, null); - if(key == null) { - printDebugInd(ind, "" ); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NULL, + null); + if (key == null) { + printDebugInd(ind, ""); } else { printDebugInd(ind, ""); bodyEncoder.encodeAttribute("", EXI4JSONConstants.LOCALNAME_KEY, null, new StringValue(key)); @@ -272,75 +277,66 @@ private void generateV1(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE throw new RuntimeException("Not supported JSON event: " + e); } } - + bodyEncoder.encodeEndDocument(); bodyEncoder.flush(); } - - private void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIException, IOException { + + protected void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIException, IOException { // DEBUG = System.out; - + EXIStreamEncoder streamEncoder = ef.createEXIStreamEncoder(); - + EXIBodyEncoder bodyEncoder = streamEncoder.encodeHeader(osEXI4JSON); - + JsonParser parser = Json.createParser(isJSON); - + bodyEncoder.encodeStartDocument(); - + List events = new ArrayList(); List keys = new ArrayList(); - + while (parser.hasNext()) { Event e = parser.next(); - - switch(e) { + + switch (e) { case KEY_NAME: events.add(e); String key = parser.getString(); - if (EXI4JSONConstants.LOCALNAME_MAP.equals(key) || EXI4JSONConstants.LOCALNAME_ARRAY.equals(key) - || EXI4JSONConstants.LOCALNAME_STRING.equals(key) - || EXI4JSONConstants.LOCALNAME_NUMBER.equals(key) - || EXI4JSONConstants.LOCALNAME_BOOLEAN.equals(key) - || EXI4JSONConstants.LOCALNAME_NULL.equals(key) - || EXI4JSONConstants.LOCALNAME_OTHER.equals(key)) { - // Key-name Escaping (https://www.w3.org/TR/2016/WD-exi-for-json-20160823/#keynameEscaping) - // --> Conflict with existing EXI4JSON global schema element name - key = EXI4JSONConstants.ESCAPE_START_CHARACTER + EXI4JSONConstants.ESCAPE_END_CHARACTER + key; - } - // TODO represent '_' itself - // TODO Conflict with NCName character(s) - + key = escapeKey(key); keys.add(key); bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, key, null); - printDebug("<" + key +">"); + printDebug("<" + key + ">"); break; case START_OBJECT: events.add(e); - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_MAP, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_MAP, + null); printDebug(""); break; case END_OBJECT: printDebug(""); - Event eo = events.remove(events.size()-1); - assert(eo == Event.START_OBJECT); + Event eo = events.remove(events.size() - 1); + assert (eo == Event.START_OBJECT); bodyEncoder.encodeEndElement(); checkKeyEnd(events, keys, bodyEncoder); break; case START_ARRAY: events.add(e); - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_ARRAY, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_ARRAY, + null); printDebug(""); break; case END_ARRAY: printDebug(""); - Event ea = events.remove(events.size()-1); - assert(ea == Event.START_ARRAY); + Event ea = events.remove(events.size() - 1); + assert (ea == Event.START_ARRAY); bodyEncoder.encodeEndElement(); checkKeyEnd(events, keys, bodyEncoder); break; case VALUE_STRING: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_STRING, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_STRING, + null); bodyEncoder.encodeCharacters(new StringValue(parser.getString())); printDebug("" + parser.getString() + ""); bodyEncoder.encodeEndElement(); @@ -348,7 +344,8 @@ private void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE break; case VALUE_NUMBER: // TODO use /other/integer if it is an integer value - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NUMBER, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NUMBER, + null); bodyEncoder.encodeCharacters(new StringValue(parser.getString())); printDebug("" + parser.getString() + ""); bodyEncoder.encodeEndElement(); @@ -356,7 +353,8 @@ private void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE break; case VALUE_FALSE: case VALUE_TRUE: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_BOOLEAN, null); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, + EXI4JSONConstants.LOCALNAME_BOOLEAN, null); String sb = e == Event.VALUE_FALSE ? "false" : "true"; bodyEncoder.encodeCharacters(new StringValue(sb)); printDebug("" + sb + ""); @@ -364,8 +362,9 @@ private void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE checkKeyEnd(events, keys, bodyEncoder); break; case VALUE_NULL: - bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NULL, null); - printDebug("" ); + bodyEncoder.encodeStartElement(EXI4JSONConstants.NAMESPACE_EXI4JSON, EXI4JSONConstants.LOCALNAME_NULL, + null); + printDebug(""); bodyEncoder.encodeEndElement(); checkKeyEnd(events, keys, bodyEncoder); break; @@ -373,18 +372,222 @@ private void generateV2(InputStream isJSON, OutputStream osEXI4JSON) throws EXIE throw new RuntimeException("Not supported JSON event: " + e); } } - + bodyEncoder.encodeEndDocument(); bodyEncoder.flush(); } - - private static void checkKeyEnd(List events, List keys, EXIBodyEncoder bodyEncoder) throws EXIException, IOException { - if(events.size()>0 && events.get(events.size()-1) == Event.KEY_NAME) { + + protected String escapeKey(String key) { + if (EXI4JSONConstants.LOCALNAME_MAP.equals(key) || EXI4JSONConstants.LOCALNAME_ARRAY.equals(key) + || EXI4JSONConstants.LOCALNAME_STRING.equals(key) || EXI4JSONConstants.LOCALNAME_NUMBER.equals(key) + || EXI4JSONConstants.LOCALNAME_BOOLEAN.equals(key) || EXI4JSONConstants.LOCALNAME_NULL.equals(key) + || EXI4JSONConstants.LOCALNAME_OTHER.equals(key)) { + // Key-name Escaping + // (https://www.w3.org/TR/2016/WD-exi-for-json-20160823/#keynameEscaping) + // --> Conflict with existing EXI4JSON global schema element name + key = String.valueOf(EXI4JSONConstants.ESCAPE_START_CHARACTER) + + String.valueOf(EXI4JSONConstants.ESCAPE_END_CHARACTER) + key; + } + + // TODO represent '_' itself + // TODO Conflict with NCName character(s) + + return key; + } + + private static void checkKeyEnd(List events, List keys, EXIBodyEncoder bodyEncoder) + throws EXIException, IOException { + if (events.size() > 0 && events.get(events.size() - 1) == Event.KEY_NAME) { bodyEncoder.encodeEndElement(); // end of key element - printDebug(""); - events.remove(events.size()-1); + printDebug(""); + events.remove(events.size() - 1); } } - + // inspired by + // http://www.java2s.com/Code/Java/XML/CheckswhetherthesuppliedStringisanNCNameNamespaceClassifiedName.htm + // --> escapes also '_' for EXI4JSON + public static String escapeNCNamePlus(String name) { + if (name == null || name.length() == 0) { + throw new RuntimeException("Unsupoorted NCName: " + name); + } + + StringBuilder sb = null; + + for (int i = 0; i < name.length(); i++) { + char c = name.charAt(i); + + if (i == 0) { + // first character (special) + if (isLetter(c)) { + // OK + } else if (c == '_') { + // valid NCName, but needs to be escaped for EXI4JSON + + } else { + if (sb == null) { + sb = new StringBuilder(); + } + } + } else { + // rest of the characters + + if (isNCNameChar(c)) { + // OK + if(c == '_') { + // update + } + } else { + // Not OK, fix + } + } + } + + // All characters have been checked + if (sb == null) { + return name; // as is + } else { + return sb.toString(); + } + } + + public static final boolean isNCNameChar(char c) { + return _isAsciiBaseChar(c) || _isAsciiDigit(c) || c == '.' || c == '-' || c == '_' || _isNonAsciiBaseChar(c) + || _isNonAsciiDigit(c) || isIdeographic(c) || isCombiningChar(c) || isExtender(c); + } + + public static final boolean isLetter(char c) { + return _isAsciiBaseChar(c) || _isNonAsciiBaseChar(c) || isIdeographic(c); + } + + private static final boolean _isAsciiBaseChar(char c) { + return _charInRange(c, 0x0041, 0x005A) || _charInRange(c, 0x0061, 0x007A); + } + + private static final boolean _isNonAsciiBaseChar(char c) { + return _charInRange(c, 0x00C0, 0x00D6) || _charInRange(c, 0x00D8, 0x00F6) || _charInRange(c, 0x00F8, 0x00FF) + || _charInRange(c, 0x0100, 0x0131) || _charInRange(c, 0x0134, 0x013E) || _charInRange(c, 0x0141, 0x0148) + || _charInRange(c, 0x014A, 0x017E) || _charInRange(c, 0x0180, 0x01C3) || _charInRange(c, 0x01CD, 0x01F0) + || _charInRange(c, 0x01F4, 0x01F5) || _charInRange(c, 0x01FA, 0x0217) || _charInRange(c, 0x0250, 0x02A8) + || _charInRange(c, 0x02BB, 0x02C1) || c == 0x0386 || _charInRange(c, 0x0388, 0x038A) || c == 0x038C + || _charInRange(c, 0x038E, 0x03A1) || _charInRange(c, 0x03A3, 0x03CE) || _charInRange(c, 0x03D0, 0x03D6) + || c == 0x03DA || c == 0x03DC || c == 0x03DE || c == 0x03E0 || _charInRange(c, 0x03E2, 0x03F3) + || _charInRange(c, 0x0401, 0x040C) || _charInRange(c, 0x040E, 0x044F) || _charInRange(c, 0x0451, 0x045C) + || _charInRange(c, 0x045E, 0x0481) || _charInRange(c, 0x0490, 0x04C4) || _charInRange(c, 0x04C7, 0x04C8) + || _charInRange(c, 0x04CB, 0x04CC) || _charInRange(c, 0x04D0, 0x04EB) || _charInRange(c, 0x04EE, 0x04F5) + || _charInRange(c, 0x04F8, 0x04F9) || _charInRange(c, 0x0531, 0x0556) || c == 0x0559 + || _charInRange(c, 0x0561, 0x0586) || _charInRange(c, 0x05D0, 0x05EA) || _charInRange(c, 0x05F0, 0x05F2) + || _charInRange(c, 0x0621, 0x063A) || _charInRange(c, 0x0641, 0x064A) || _charInRange(c, 0x0671, 0x06B7) + || _charInRange(c, 0x06BA, 0x06BE) || _charInRange(c, 0x06C0, 0x06CE) || _charInRange(c, 0x06D0, 0x06D3) + || c == 0x06D5 || _charInRange(c, 0x06E5, 0x06E6) || _charInRange(c, 0x0905, 0x0939) || c == 0x093D + || _charInRange(c, 0x0958, 0x0961) || _charInRange(c, 0x0985, 0x098C) || _charInRange(c, 0x098F, 0x0990) + || _charInRange(c, 0x0993, 0x09A8) || _charInRange(c, 0x09AA, 0x09B0) || c == 0x09B2 + || _charInRange(c, 0x09B6, 0x09B9) || _charInRange(c, 0x09DC, 0x09DD) || _charInRange(c, 0x09DF, 0x09E1) + || _charInRange(c, 0x09F0, 0x09F1) || _charInRange(c, 0x0A05, 0x0A0A) || _charInRange(c, 0x0A0F, 0x0A10) + || _charInRange(c, 0x0A13, 0x0A28) || _charInRange(c, 0x0A2A, 0x0A30) || _charInRange(c, 0x0A32, 0x0A33) + || _charInRange(c, 0x0A35, 0x0A36) || _charInRange(c, 0x0A38, 0x0A39) || _charInRange(c, 0x0A59, 0x0A5C) + || c == 0x0A5E || _charInRange(c, 0x0A72, 0x0A74) || _charInRange(c, 0x0A85, 0x0A8B) || c == 0x0A8D + || _charInRange(c, 0x0A8F, 0x0A91) || _charInRange(c, 0x0A93, 0x0AA8) || _charInRange(c, 0x0AAA, 0x0AB0) + || _charInRange(c, 0x0AB2, 0x0AB3) || _charInRange(c, 0x0AB5, 0x0AB9) || c == 0x0ABD || c == 0x0AE0 + || _charInRange(c, 0x0B05, 0x0B0C) || _charInRange(c, 0x0B0F, 0x0B10) || _charInRange(c, 0x0B13, 0x0B28) + || _charInRange(c, 0x0B2A, 0x0B30) || _charInRange(c, 0x0B32, 0x0B33) || _charInRange(c, 0x0B36, 0x0B39) + || c == 0x0B3D || _charInRange(c, 0x0B5C, 0x0B5D) || _charInRange(c, 0x0B5F, 0x0B61) + || _charInRange(c, 0x0B85, 0x0B8A) || _charInRange(c, 0x0B8E, 0x0B90) || _charInRange(c, 0x0B92, 0x0B95) + || _charInRange(c, 0x0B99, 0x0B9A) || c == 0x0B9C || _charInRange(c, 0x0B9E, 0x0B9F) + || _charInRange(c, 0x0BA3, 0x0BA4) || _charInRange(c, 0x0BA8, 0x0BAA) || _charInRange(c, 0x0BAE, 0x0BB5) + || _charInRange(c, 0x0BB7, 0x0BB9) || _charInRange(c, 0x0C05, 0x0C0C) || _charInRange(c, 0x0C0E, 0x0C10) + || _charInRange(c, 0x0C12, 0x0C28) || _charInRange(c, 0x0C2A, 0x0C33) || _charInRange(c, 0x0C35, 0x0C39) + || _charInRange(c, 0x0C60, 0x0C61) || _charInRange(c, 0x0C85, 0x0C8C) || _charInRange(c, 0x0C8E, 0x0C90) + || _charInRange(c, 0x0C92, 0x0CA8) || _charInRange(c, 0x0CAA, 0x0CB3) || _charInRange(c, 0x0CB5, 0x0CB9) + || c == 0x0CDE || _charInRange(c, 0x0CE0, 0x0CE1) || _charInRange(c, 0x0D05, 0x0D0C) + || _charInRange(c, 0x0D0E, 0x0D10) || _charInRange(c, 0x0D12, 0x0D28) || _charInRange(c, 0x0D2A, 0x0D39) + || _charInRange(c, 0x0D60, 0x0D61) || _charInRange(c, 0x0E01, 0x0E2E) || c == 0x0E30 + || _charInRange(c, 0x0E32, 0x0E33) || _charInRange(c, 0x0E40, 0x0E45) || _charInRange(c, 0x0E81, 0x0E82) + || c == 0x0E84 || _charInRange(c, 0x0E87, 0x0E88) || c == 0x0E8A || c == 0x0E8D + || _charInRange(c, 0x0E94, 0x0E97) || _charInRange(c, 0x0E99, 0x0E9F) || _charInRange(c, 0x0EA1, 0x0EA3) + || c == 0x0EA5 || c == 0x0EA7 || _charInRange(c, 0x0EAA, 0x0EAB) || _charInRange(c, 0x0EAD, 0x0EAE) + || c == 0x0EB0 || _charInRange(c, 0x0EB2, 0x0EB3) || c == 0x0EBD || _charInRange(c, 0x0EC0, 0x0EC4) + || _charInRange(c, 0x0F40, 0x0F47) || _charInRange(c, 0x0F49, 0x0F69) || _charInRange(c, 0x10A0, 0x10C5) + || _charInRange(c, 0x10D0, 0x10F6) || c == 0x1100 || _charInRange(c, 0x1102, 0x1103) + || _charInRange(c, 0x1105, 0x1107) || c == 0x1109 || _charInRange(c, 0x110B, 0x110C) + || _charInRange(c, 0x110E, 0x1112) || c == 0x113C || c == 0x113E || c == 0x1140 || c == 0x114C + || c == 0x114E || c == 0x1150 || _charInRange(c, 0x1154, 0x1155) || c == 0x1159 + || _charInRange(c, 0x115F, 0x1161) || c == 0x1163 || c == 0x1165 || c == 0x1167 || c == 0x1169 + || _charInRange(c, 0x116D, 0x116E) || _charInRange(c, 0x1172, 0x1173) || c == 0x1175 || c == 0x119E + || c == 0x11A8 || c == 0x11AB || _charInRange(c, 0x11AE, 0x11AF) || _charInRange(c, 0x11B7, 0x11B8) + || c == 0x11BA || _charInRange(c, 0x11BC, 0x11C2) || c == 0x11EB || c == 0x11F0 || c == 0x11F9 + || _charInRange(c, 0x1E00, 0x1E9B) || _charInRange(c, 0x1EA0, 0x1EF9) || _charInRange(c, 0x1F00, 0x1F15) + || _charInRange(c, 0x1F18, 0x1F1D) || _charInRange(c, 0x1F20, 0x1F45) || _charInRange(c, 0x1F48, 0x1F4D) + || _charInRange(c, 0x1F50, 0x1F57) || c == 0x1F59 || c == 0x1F5B || c == 0x1F5D + || _charInRange(c, 0x1F5F, 0x1F7D) || _charInRange(c, 0x1F80, 0x1FB4) || _charInRange(c, 0x1FB6, 0x1FBC) + || c == 0x1FBE || _charInRange(c, 0x1FC2, 0x1FC4) || _charInRange(c, 0x1FC6, 0x1FCC) + || _charInRange(c, 0x1FD0, 0x1FD3) || _charInRange(c, 0x1FD6, 0x1FDB) || _charInRange(c, 0x1FE0, 0x1FEC) + || _charInRange(c, 0x1FF2, 0x1FF4) || _charInRange(c, 0x1FF6, 0x1FFC) || c == 0x2126 + || _charInRange(c, 0x212A, 0x212B) || c == 0x212E || _charInRange(c, 0x2180, 0x2182) + || _charInRange(c, 0x3041, 0x3094) || _charInRange(c, 0x30A1, 0x30FA) || _charInRange(c, 0x3105, 0x312C) + || _charInRange(c, 0xAC00, 0xD7A3); + } + + public static final boolean isIdeographic(char c) { + return _charInRange(c, 0x4E00, 0x9FA5) || c == 0x3007 || _charInRange(c, 0x3021, 0x3029); + } + + public static final boolean isCombiningChar(char c) { + return _charInRange(c, 0x0300, 0x0345) || _charInRange(c, 0x0360, 0x0361) || _charInRange(c, 0x0483, 0x0486) + || _charInRange(c, 0x0591, 0x05A1) || _charInRange(c, 0x05A3, 0x05B9) || _charInRange(c, 0x05BB, 0x05BD) + || c == 0x05BF || _charInRange(c, 0x05C1, 0x05C2) || c == 0x05C4 || _charInRange(c, 0x064B, 0x0652) + || c == 0x0670 || _charInRange(c, 0x06D6, 0x06DC) || _charInRange(c, 0x06DD, 0x06DF) + || _charInRange(c, 0x06E0, 0x06E4) || _charInRange(c, 0x06E7, 0x06E8) || _charInRange(c, 0x06EA, 0x06ED) + || _charInRange(c, 0x0901, 0x0903) || c == 0x093C || _charInRange(c, 0x093E, 0x094C) || c == 0x094D + || _charInRange(c, 0x0951, 0x0954) || _charInRange(c, 0x0962, 0x0963) || _charInRange(c, 0x0981, 0x0983) + || c == 0x09BC || c == 0x09BE || c == 0x09BF || _charInRange(c, 0x09C0, 0x09C4) + || _charInRange(c, 0x09C7, 0x09C8) || _charInRange(c, 0x09CB, 0x09CD) || c == 0x09D7 + || _charInRange(c, 0x09E2, 0x09E3) || c == 0x0A02 || c == 0x0A3C || c == 0x0A3E || c == 0x0A3F + || _charInRange(c, 0x0A40, 0x0A42) || _charInRange(c, 0x0A47, 0x0A48) || _charInRange(c, 0x0A4B, 0x0A4D) + || _charInRange(c, 0x0A70, 0x0A71) || _charInRange(c, 0x0A81, 0x0A83) || c == 0x0ABC + || _charInRange(c, 0x0ABE, 0x0AC5) || _charInRange(c, 0x0AC7, 0x0AC9) || _charInRange(c, 0x0ACB, 0x0ACD) + || _charInRange(c, 0x0B01, 0x0B03) || c == 0x0B3C || _charInRange(c, 0x0B3E, 0x0B43) + || _charInRange(c, 0x0B47, 0x0B48) || _charInRange(c, 0x0B4B, 0x0B4D) || _charInRange(c, 0x0B56, 0x0B57) + || _charInRange(c, 0x0B82, 0x0B83) || _charInRange(c, 0x0BBE, 0x0BC2) || _charInRange(c, 0x0BC6, 0x0BC8) + || _charInRange(c, 0x0BCA, 0x0BCD) || c == 0x0BD7 || _charInRange(c, 0x0C01, 0x0C03) + || _charInRange(c, 0x0C3E, 0x0C44) || _charInRange(c, 0x0C46, 0x0C48) || _charInRange(c, 0x0C4A, 0x0C4D) + || _charInRange(c, 0x0C55, 0x0C56) || _charInRange(c, 0x0C82, 0x0C83) || _charInRange(c, 0x0CBE, 0x0CC4) + || _charInRange(c, 0x0CC6, 0x0CC8) || _charInRange(c, 0x0CCA, 0x0CCD) || _charInRange(c, 0x0CD5, 0x0CD6) + || _charInRange(c, 0x0D02, 0x0D03) || _charInRange(c, 0x0D3E, 0x0D43) || _charInRange(c, 0x0D46, 0x0D48) + || _charInRange(c, 0x0D4A, 0x0D4D) || c == 0x0D57 || c == 0x0E31 || _charInRange(c, 0x0E34, 0x0E3A) + || _charInRange(c, 0x0E47, 0x0E4E) || c == 0x0EB1 || _charInRange(c, 0x0EB4, 0x0EB9) + || _charInRange(c, 0x0EBB, 0x0EBC) || _charInRange(c, 0x0EC8, 0x0ECD) || _charInRange(c, 0x0F18, 0x0F19) + || c == 0x0F35 || c == 0x0F37 || c == 0x0F39 || c == 0x0F3E || c == 0x0F3F + || _charInRange(c, 0x0F71, 0x0F84) || _charInRange(c, 0x0F86, 0x0F8B) || _charInRange(c, 0x0F90, 0x0F95) + || c == 0x0F97 || _charInRange(c, 0x0F99, 0x0FAD) || _charInRange(c, 0x0FB1, 0x0FB7) || c == 0x0FB9 + || _charInRange(c, 0x20D0, 0x20DC) || c == 0x20E1 || _charInRange(c, 0x302A, 0x302F) || c == 0x3099 + || c == 0x309A; + } + + public static final boolean isDigit(char c) { + return _isAsciiDigit(c) || _isNonAsciiDigit(c); + } + + private static final boolean _isAsciiDigit(char c) { + return _charInRange(c, 0x0030, 0x0039); + } + + private static final boolean _isNonAsciiDigit(char c) { + return _charInRange(c, 0x0660, 0x0669) || _charInRange(c, 0x06F0, 0x06F9) || _charInRange(c, 0x0966, 0x096F) + || _charInRange(c, 0x09E6, 0x09EF) || _charInRange(c, 0x0A66, 0x0A6F) || _charInRange(c, 0x0AE6, 0x0AEF) + || _charInRange(c, 0x0B66, 0x0B6F) || _charInRange(c, 0x0BE7, 0x0BEF) || _charInRange(c, 0x0C66, 0x0C6F) + || _charInRange(c, 0x0CE6, 0x0CEF) || _charInRange(c, 0x0D66, 0x0D6F) || _charInRange(c, 0x0E50, 0x0E59) + || _charInRange(c, 0x0ED0, 0x0ED9) || _charInRange(c, 0x0F20, 0x0F29); + } + + public static final boolean isExtender(char c) { + return c == 0x00B7 || c == 0x02D0 || c == 0x02D1 || c == 0x0387 || c == 0x0640 || c == 0x0E46 || c == 0x0EC6 + || c == 0x3005 || _charInRange(c, 0x3031, 0x3035) || _charInRange(c, 0x309D, 0x309E) + || _charInRange(c, 0x30FC, 0x30FE); + } + + private static final boolean _charInRange(char c, int start, int end) { + return c >= start && c <= end; + } + } diff --git a/src/main/java/com/siemens/ct/exi/json/EXIforJSONParser.java b/src/main/java/com/siemens/ct/exi/json/EXIforJSONParser.java index 4a1d3b8..5ea827b 100644 --- a/src/main/java/com/siemens/ct/exi/json/EXIforJSONParser.java +++ b/src/main/java/com/siemens/ct/exi/json/EXIforJSONParser.java @@ -155,7 +155,7 @@ public void parse(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExcepti } } - private void parseV1(InputStream isEXI4JSON, OutputStream osJSON) throws EXIException, IOException { + protected void parseV1(InputStream isEXI4JSON, OutputStream osJSON) throws EXIException, IOException { EXIStreamDecoder streamDecoder = ef.createEXIStreamDecoder(); EXIBodyDecoder bodyDecoder = streamDecoder.decodeHeader(isEXI4JSON); @@ -228,7 +228,7 @@ private void parseV1(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExce } - private void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIException, IOException { + protected void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIException, IOException { EXIStreamDecoder streamDecoder = ef.createEXIStreamDecoder(); EXIBodyDecoder bodyDecoder = streamDecoder.decodeHeader(isEXI4JSON); @@ -245,15 +245,15 @@ private void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExce case END_DOCUMENT: bodyDecoder.decodeEndDocument(); break; - case ATTRIBUTE: - QNameContext qncAT = bodyDecoder.decodeAttribute(); - if(!EXI4JSONConstants.LOCALNAME_KEY.equals(qncAT.getLocalName())) { - throw new RuntimeException("Not supported EXI attribute: " + qncAT); - } - Value avalue = bodyDecoder.getAttributeValue(); - key = avalue.toString(); -// checkPendingEvent(generator, jsonEvent, value.toString()); - break; +// case ATTRIBUTE: +// QNameContext qncAT = bodyDecoder.decodeAttribute(); +// if(!EXI4JSONConstants.LOCALNAME_KEY.equals(qncAT.getLocalName())) { +// throw new RuntimeException("Not supported EXI attribute: " + qncAT); +// } +// Value avalue = bodyDecoder.getAttributeValue(); +// key = avalue.toString(); +//// checkPendingEvent(generator, jsonEvent, value.toString()); +// break; case CHARACTERS: value = bodyDecoder.decodeCharacters(); checkPendingEvent(generator); @@ -261,6 +261,7 @@ private void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExce case START_ELEMENT_NS: // key element key = bodyDecoder.decodeStartElement().getLocalName(); + key = unescapeKey(key); break; case START_ELEMENT: case START_ELEMENT_GENERIC: @@ -296,8 +297,9 @@ private void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExce // TODO other element throw new RuntimeException("'other' element not yet supported!"); } else { - // key element - key = bodyDecoder.decodeStartElement().getLocalName(); + // key element --> not necessary here : MUST BE START_ELEMENT_NS + // key = bodyDecoder.decodeStartElement().getLocalName(); + throw new RuntimeException("Unexpected element " + qncSE); } break; case END_ELEMENT: @@ -317,6 +319,17 @@ private void parseV2(InputStream isEXI4JSON, OutputStream osJSON) throws EXIExce generator.flush(); } + protected String unescapeKey(String key) { + // conflicting names + if(key.length() > 2 && key.charAt(0) == EXI4JSONConstants.ESCAPE_START_CHARACTER && key.charAt(1) == EXI4JSONConstants.ESCAPE_END_CHARACTER) { + key = key.substring(2); + } + // TODO represent '_' itself + // TODO Conflict with NCName character(s) + + return key; + } + // public static void main(String[] args) throws EXIException, IOException { // if(args.length == 1) { // EXIforJSONParser e4jParser = new EXIforJSONParser(); diff --git a/src/test/java/com/siemens/ct/exi/json/AbstractJSONDataTests.java b/src/test/java/com/siemens/ct/exi/json/AbstractJSONDataTests.java index 1bf94eb..de95f55 100644 --- a/src/test/java/com/siemens/ct/exi/json/AbstractJSONDataTests.java +++ b/src/test/java/com/siemens/ct/exi/json/AbstractJSONDataTests.java @@ -57,18 +57,34 @@ public void testFancyLed() throws EXIException, IOException, JSONException { _test(readURL(url)); _test(readURL(url), sharedStrings); } - - + @Test public void testJSIssue1() throws EXIException, IOException, JSONException { String jsonTest = "{\n\"type\": \"FeatureCollection\",\n\"totalFeatures\": 2,\n\"features\": [\n{\n\"type\": \"Feature\",\n\"id\": \"poi.1\",\n\"geometry\": {\n\"type\": \"Point\",\n\"coordinates\": [\n40.707587626256554,\n-74.01046109936333\n]\n},\n\"geometry_name\": \"the_geom\",\n\"properties\": {\n\"NAME\": \"museam\",\n\"THUMBNAIL\": \"pics/22037827-Ti.jpg\",\n\"MAINPAGE\": \"pics/22037827-L.jpg\"\n}\n},\n{\n\"type\": \"Feature\",\n\"id\": \"poi.2\",\n\"geometry\": {\n\"type\": \"Point\",\n\"coordinates\": [\n40.70754683896324,\n-74.0108375113659\n]\n},\n\"geometry_name\": \"the_geom\",\n\"properties\": {\n\"NAME\": \"stock\",\n\"THUMBNAIL\": \"pics/22037829-Ti.jpg\",\n\"MAINPAGE\": \"pics/22037829-L.jpg\"\n}\n}\n],\n\"crs\": {\n\"type\": \"EPSG\",\n\"properties\": {\n\"code\": \"4326\"\n}\n}\n}"; _test(jsonTest); } + + // Key-name Escaping: Conflict with existing EXI4JSON global schema element + // name "number" + // {"a number": 1} + @Test + public void testKeynameEscapingConflictEXI4JSONName1() throws EXIException, IOException, JSONException { + String jsonTest = "{\r\n" + " \"number\": 1\r\n" + "}"; + _test(jsonTest); + } + // Key-name Escaping: Conflict with NCName character(s) "a number" + // {"number": 1} + @Test + public void testKeynameEscapingConflictNCName1() throws EXIException, IOException, JSONException { + String jsonTest = "{\r\n" + " \"a number\": 2\r\n" + "}"; + _test(jsonTest); + } abstract protected int _test(String expected) throws EXIException, IOException, JSONException; - - abstract protected int _test(String expected, List sharedStrings) throws EXIException, IOException, JSONException; + + abstract protected int _test(String expected, List sharedStrings) + throws EXIException, IOException, JSONException; static String readFile(String path) throws IOException { byte[] encoded = Files.readAllBytes(Paths.get(path)); diff --git a/src/test/java/com/siemens/ct/exi/json/JSONDataTestsV2.java b/src/test/java/com/siemens/ct/exi/json/JSONDataTestsV2.java index d4c3901..c875dd6 100644 --- a/src/test/java/com/siemens/ct/exi/json/JSONDataTestsV2.java +++ b/src/test/java/com/siemens/ct/exi/json/JSONDataTestsV2.java @@ -7,33 +7,34 @@ import java.util.List; import org.json.JSONException; +import org.junit.Test; import org.skyscreamer.jsonassert.JSONAssert; import com.siemens.ct.exi.EXIFactory; import com.siemens.ct.exi.exceptions.EXIException; import com.siemens.ct.exi.helpers.DefaultEXIFactory; -public class JSONDataTestsV2 extends AbstractJSONDataTests { +public class JSONDataTestsV2 extends AbstractJSONDataTests { protected int _test(String expected) throws EXIException, IOException, JSONException { return _test(expected, null); } - + protected int _test(String expected, List sharedStrings) throws EXIException, IOException, JSONException { EXIforJSONGenerator e4jGenerator; EXIforJSONParser e4jParser; - - if(sharedStrings == null) { + + if (sharedStrings == null) { e4jGenerator = new EXIforJSONGenerator(); e4jParser = new EXIforJSONParser(); } else { EXIFactory ef = DefaultEXIFactory.newInstance(); ef.setSharedStrings(sharedStrings); - + e4jGenerator = new EXIforJSONGenerator(ef); e4jParser = new EXIforJSONParser(ef); } - + // generate exi-for-json InputStream is = new ByteArrayInputStream(expected.getBytes()); // ByteArrayOutputStream baosEXI = new ByteArrayOutputStream(); @@ -50,8 +51,68 @@ protected int _test(String expected, List sharedStrings) throws EXIExcep // String expected = readFile(path); // actual = actual.replace("1.0", "1.001"); // test JSONAssert.assertEquals(expected, actual, true); - + return baosEXI.size(); } + @Test + public void testEscapeKey0() throws EXIException, IOException, JSONException { + String key = "normalKey"; + EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); + String ekey = e4jGenerator.escapeKey(key); + + assertTrue(key.equals(ekey)); + + EXIforJSONParser e4jParser = new EXIforJSONParser(); + String ukey = e4jParser.unescapeKey(ekey); + assertTrue(ukey.equals(key)); + } + + @Test + public void testEscapeKey1() throws EXIException, IOException, JSONException { + String key = EXI4JSONConstants.LOCALNAME_NUMBER; // "number" + EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); + String ekey = e4jGenerator.escapeKey(key); + + assertFalse(key.equals(ekey)); + assertTrue((String.valueOf(EXI4JSONConstants.ESCAPE_START_CHARACTER)+String.valueOf(EXI4JSONConstants.ESCAPE_END_CHARACTER)+EXI4JSONConstants.LOCALNAME_NUMBER).equals(ekey)); + + EXIforJSONParser e4jParser = new EXIforJSONParser(); + String ukey = e4jParser.unescapeKey(ekey); + + assertTrue(ukey.equals(key)); + } + + @Test + public void testEscapeKey2() throws EXIException, IOException, JSONException { + String key = "a number"; + EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); + String ekey = e4jGenerator.escapeKey(key); + + assertFalse(key.equals(ekey)); + assertTrue("a_32.number".equals(ekey)); + + // TODO + } + + @Test + public void testEscapeKey3() throws EXIException, IOException, JSONException { + String key = "_foo"; + EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); + String ekey = e4jGenerator.escapeKey(key); + + assertFalse(key.equals(ekey)); + assertTrue("_95.foo".equals(ekey)); + } + + @Test + public void testEscapeKey4() throws EXIException, IOException, JSONException { + String key = "foo_.A"; + EXIforJSONGenerator e4jGenerator = new EXIforJSONGenerator(); + String ekey = e4jGenerator.escapeKey(key); + + assertFalse(key.equals(ekey)); + assertTrue("foo_95..A".equals(ekey)); + } + }