diff --git a/Makefile b/Makefile index b2c40367c9c0a..732f49893639e 100644 --- a/Makefile +++ b/Makefile @@ -235,6 +235,7 @@ C_TESTS = \ CC_TESTS = \ tests/pb/test_decoder \ + tests/json/test_json \ tests/test_cpp \ tests/test_table \ @@ -264,6 +265,7 @@ tests/test_handlers: LIBS = lib/libupb.descriptor.a lib/libupb.a tests/pb/test_decoder: LIBS = lib/libupb.pb.a lib/libupb.a tests/test_cpp: LIBS = $(LOAD_DESCRIPTOR_LIBS) lib/libupb.a tests/test_table: LIBS = lib/libupb.a +tests/json/test_json: LIBS = lib/libupb.a lib/libupb.json.a tests/test_def: tests/test.proto.pb diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc new file mode 100644 index 0000000000000..14440817062d4 --- /dev/null +++ b/tests/json/test_json.cc @@ -0,0 +1,244 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2014 Google Inc. See LICENSE for details. + * + * A set of tests for JSON parsing and serialization. + */ + +#include "tests/upb_test.h" +#include "upb/handlers.h" +#include "upb/symtab.h" +#include "upb/json/printer.h" +#include "upb/json/parser.h" +#include "upb/upb.h" + +#include + +// Macros for readability in test case list: allows us to give TEST("...") / +// EXPECT("...") pairs. +#define TEST(x) x +#define EXPECT_SAME NULL +#define EXPECT(x) x +#define TEST_SENTINEL { NULL, NULL } + +struct TestCase { + const char* input; + const char* expected; +}; + +static TestCase kTestRoundtripMessages[] = { + // Test most fields here. + { + TEST("{\"optional_int32\":-42,\"optional_string\":\"Test\\u0001Message\"," + "\"optional_msg\":{\"foo\":42}," + "\"optional_bool\":true,\"repeated_msg\":[{\"foo\":1}," + "{\"foo\":2}]}"), + EXPECT_SAME + }, + // Test special escapes in strings. + { + TEST("{\"repeated_string\":[\"\\b\",\"\\r\",\"\\n\",\"\\f\",\"\\t\"," + "\"\uFFFF\"]}"), + EXPECT_SAME + }, + // Test enum symbolic names. + { + // The common case: parse and print the symbolic name. + TEST("{\"optional_enum\":\"A\"}"), + EXPECT_SAME + }, + { + // Unknown enum value: will be printed as an integer. + TEST("{\"optional_enum\":42}"), + EXPECT_SAME + }, + { + // Known enum value: we're happy to parse an integer but we will re-emit the + // symbolic name. + TEST("{\"optional_enum\":1}"), + EXPECT("{\"optional_enum\":\"B\"}") + }, + // UTF-8 tests: escapes -> literal UTF8 in output. + { + // Note double escape on \uXXXX: we want the escape to be processed by the + // JSON parser, not by the C++ compiler! + TEST("{\"optional_string\":\"\\u007F\"}"), + EXPECT("{\"optional_string\":\"\x7F\"}") + }, + { + TEST("{\"optional_string\":\"\\u0080\"}"), + EXPECT("{\"optional_string\":\"\xC2\x80\"}") + }, + { + TEST("{\"optional_string\":\"\\u07FF\"}"), + EXPECT("{\"optional_string\":\"\xDF\xBF\"}") + }, + { + TEST("{\"optional_string\":\"\\u0800\"}"), + EXPECT("{\"optional_string\":\"\xE0\xA0\x80\"}") + }, + { + TEST("{\"optional_string\":\"\\uFFFF\"}"), + EXPECT("{\"optional_string\":\"\xEF\xBF\xBF\"}") + }, + TEST_SENTINEL +}; + +static void AddField(upb::MessageDef* message, + int number, + const char* name, + upb_fieldtype_t type, + bool is_repeated, + const upb::Def* subdef = NULL) { + upb::reffed_ptr field(upb::FieldDef::New()); + upb::Status st; + field->set_name(name, &st); + field->set_type(type); + field->set_label(is_repeated ? UPB_LABEL_REPEATED : UPB_LABEL_OPTIONAL); + field->set_number(number, &st); + if (subdef) { + field->set_subdef(subdef, &st); + } + message->AddField(field, &st); +} + +static const upb::MessageDef* BuildTestMessage( + upb::reffed_ptr symtab) { + upb::Status st; + + // Create SubMessage. + upb::reffed_ptr submsg(upb::MessageDef::New()); + submsg->set_full_name("SubMessage", &st); + AddField(submsg.get(), 1, "foo", UPB_TYPE_INT32, false); + + // Create MyEnum. + upb::reffed_ptr myenum(upb::EnumDef::New()); + myenum->set_full_name("MyEnum", &st); + myenum->AddValue("A", 0, &st); + myenum->AddValue("B", 1, &st); + myenum->AddValue("C", 2, &st); + + // Create TestMessage. + upb::reffed_ptr md(upb::MessageDef::New()); + md->set_full_name("TestMessage", &st); + + AddField(md.get(), 1, "optional_int32", UPB_TYPE_INT32, false); + AddField(md.get(), 2, "optional_int64", UPB_TYPE_INT64, false); + AddField(md.get(), 3, "optional_uint32", UPB_TYPE_UINT32, false); + AddField(md.get(), 4, "optional_uint64", UPB_TYPE_UINT64, false); + AddField(md.get(), 5, "optional_string", UPB_TYPE_STRING, false); + AddField(md.get(), 6, "optional_bytes", UPB_TYPE_BYTES, false); + AddField(md.get(), 7, "optional_bool" , UPB_TYPE_BOOL, false); + AddField(md.get(), 8, "optional_msg" , UPB_TYPE_MESSAGE, false, + upb::upcast(submsg.get())); + AddField(md.get(), 9, "optional_enum", UPB_TYPE_ENUM, false, + upb::upcast(myenum.get())); + + AddField(md.get(), 11, "repeated_int32", UPB_TYPE_INT32, true); + AddField(md.get(), 12, "repeated_int64", UPB_TYPE_INT64, true); + AddField(md.get(), 13, "repeated_uint32", UPB_TYPE_UINT32, true); + AddField(md.get(), 14, "repeated_uint64", UPB_TYPE_UINT64, true); + AddField(md.get(), 15, "repeated_string", UPB_TYPE_STRING, true); + AddField(md.get(), 16, "repeated_bytes", UPB_TYPE_BYTES, true); + AddField(md.get(), 17, "repeated_bool" , UPB_TYPE_BOOL, true); + AddField(md.get(), 18, "repeated_msg" , UPB_TYPE_MESSAGE, true, + upb::upcast(submsg.get())); + AddField(md.get(), 19, "optional_enum", UPB_TYPE_ENUM, true, + upb::upcast(myenum.get())); + + // Add both to our symtab. + upb::Def* defs[3] = { + upb::upcast(submsg.ReleaseTo(&defs)), + upb::upcast(myenum.ReleaseTo(&defs)), + upb::upcast(md.ReleaseTo(&defs)), + }; + symtab->Add(defs, 3, &defs, &st); + + // Return TestMessage. + return symtab->LookupMessage("TestMessage"); +} + +class StringSink { + public: + StringSink() { + upb_byteshandler_init(&byteshandler_); + upb_byteshandler_setstring(&byteshandler_, &str_handler, NULL); + upb_bytessink_reset(&bytessink_, &byteshandler_, &s_); + } + ~StringSink() { } + + upb_bytessink* Sink() { return &bytessink_; } + + const std::string& Data() { return s_; } + + private: + + static size_t str_handler(void* _closure, const void* hd, + const char* data, size_t len, + const upb_bufhandle* handle) { + UPB_UNUSED(hd); + UPB_UNUSED(handle); + std::string* s = static_cast(_closure); + std::string appended(data, len); + s->append(data, len); + return len; + } + + upb_byteshandler byteshandler_; + upb_bytessink bytessink_; + std::string s_; +}; + +// Starts with a message in JSON format, parses and directly serializes again, +// and compares the result. +void test_json_roundtrip() { + upb::reffed_ptr symtab(upb::SymbolTable::New()); + const upb::MessageDef* md = BuildTestMessage(symtab.get()); + upb::reffed_ptr serialize_handlers( + upb::json::Printer::NewHandlers(md)); + + for (const TestCase* test_case = kTestRoundtripMessages; + test_case->input != NULL; test_case++) { + + const char *json_src = test_case->input; + const char *json_expected = test_case->expected; + if (json_expected == EXPECT_SAME) { + json_expected = json_src; + } + + upb::Status st; + upb::json::Parser parser(&st); + upb::json::Printer printer(serialize_handlers.get()); + StringSink data_sink; + + parser.ResetOutput(printer.input()); + printer.ResetOutput(data_sink.Sink()); + + bool ok = upb::BufferSource::PutBuffer(json_src, strlen(json_src), + parser.input()); + if (!ok) { + fprintf(stderr, "upb parse error: %s\n", st.error_message()); + } + ASSERT(ok); + + if (memcmp(json_expected, + data_sink.Data().data(), + data_sink.Data().size())) { + fprintf(stderr, + "JSON parse/serialize roundtrip result differs:\n" + "Original:\n%s\nParsed/Serialized:\n%s\n", + json_src, data_sink.Data().c_str()); + abort(); + } + } +} + +extern "C" { +int run_tests(int argc, char *argv[]) { + UPB_UNUSED(argc); + UPB_UNUSED(argv); + test_json_roundtrip(); + return 0; +} +} diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c index 2bd78afa51868..17fc0a865ca64 100644 --- a/upb/bindings/lua/upb.c +++ b/upb/bindings/lua/upb.c @@ -1032,7 +1032,7 @@ static int lupb_enumdef_value(lua_State *L) { } else if (type == LUA_TSTRING) { const char *key = lua_tostring(L, 2); int32_t num; - if (upb_enumdef_ntoi(e, key, &num)) { + if (upb_enumdef_ntoiz(e, key, &num)) { lua_pushinteger(L, num); } else { lua_pushnil(L); diff --git a/upb/def.c b/upb/def.c index fde2ee85228b1..aa05618dba132 100644 --- a/upb/def.c +++ b/upb/def.c @@ -457,7 +457,7 @@ bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, if (!upb_isident(name, strlen(name), false, status)) { return false; } - if (upb_enumdef_ntoi(e, name, NULL)) { + if (upb_enumdef_ntoiz(e, name, NULL)) { upb_status_seterrf(status, "name '%s' is already defined", name); return false; } @@ -505,9 +505,10 @@ void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } -bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, int32_t *num) { +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, + size_t len, int32_t *num) { upb_value v; - if (!upb_strtable_lookup(&def->ntoi, name, &v)) { + if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { return false; } if (num) *num = upb_value_getint32(v); @@ -595,7 +596,7 @@ static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) { if (f->defaultval.bytes) { // Default was explicitly set as a str; try to lookup corresponding int. str_t *s = f->defaultval.bytes; - if (upb_enumdef_ntoi(e, s->str, val)) { + if (upb_enumdef_ntoiz(e, s->str, val)) { return true; } } else { diff --git a/upb/def.h b/upb/def.h index 2699fbf888ca2..cfa140a16b83e 100644 --- a/upb/def.h +++ b/upb/def.h @@ -943,7 +943,17 @@ bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s); int upb_enumdef_numvals(const upb_enumdef *e); bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, upb_status *status); -bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, int32_t *num); + +// Enum lookups: +// - ntoi: look up a name with specified length. +// - ntoiz: look up a name provided as a null-terminated string. +// - iton: look up an integer, returning the name as a null-terminated string. +bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len, + int32_t *num); +UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e, + const char *name, int32_t *num) { + return upb_enumdef_ntoi(e, name, strlen(name), num); +} const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); // upb_enum_iter i; @@ -1352,7 +1362,7 @@ inline bool EnumDef::AddValue(const std::string& name, int32_t num, return upb_enumdef_addval(this, upb_safecstr(name), num, status); } inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const { - return upb_enumdef_ntoi(this, name, num); + return upb_enumdef_ntoiz(this, name, num); } inline const char* EnumDef::FindValueByNumber(int32_t num) const { return upb_enumdef_iton(this, num); diff --git a/upb/json/parser.c b/upb/json/parser.c index 2687713058907..78fc6c0e21c0b 100644 --- a/upb/json/parser.c +++ b/upb/json/parser.c @@ -288,7 +288,7 @@ static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -302,6 +302,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -310,29 +328,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -341,7 +368,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -450,15 +477,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -478,11 +505,11 @@ static void hex(upb_json_parser *p, const char *end) { // What follows is the Ragel parser itself. The language is specified in Ragel // and the actions call our C functions above. -#line 568 "upb/json/parser.rl" +#line 595 "upb/json/parser.rl" -#line 486 "upb/json/parser.c" +#line 513 "upb/json/parser.c" static const char _json_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, @@ -635,7 +662,7 @@ static const int json_en_value_machine = 27; static const int json_en_main = 1; -#line 571 "upb/json/parser.rl" +#line 598 "upb/json/parser.rl" size_t parse(void *closure, const void *hd, const char *buf, size_t size, const upb_bufhandle *handle) { @@ -652,7 +679,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, const char *pe = buf + size; -#line 656 "upb/json/parser.c" +#line 683 "upb/json/parser.c" { int _klen; unsigned int _trans; @@ -727,114 +754,114 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, switch ( *_acts++ ) { case 0: -#line 489 "upb/json/parser.rl" +#line 516 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; case 1: -#line 490 "upb/json/parser.rl" +#line 517 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 10; goto _again;} } break; case 2: -#line 494 "upb/json/parser.rl" +#line 521 "upb/json/parser.rl" { start_text(parser, p); } break; case 3: -#line 495 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p)); } +#line 522 "upb/json/parser.rl" + { CHECK_RETURN_TOP(end_text(parser, p, false)); } break; case 4: -#line 501 "upb/json/parser.rl" +#line 528 "upb/json/parser.rl" { start_hex(parser, p); } break; case 5: -#line 502 "upb/json/parser.rl" +#line 529 "upb/json/parser.rl" { hex(parser, p); } break; case 6: -#line 508 "upb/json/parser.rl" +#line 535 "upb/json/parser.rl" { escape(parser, p); } break; case 7: -#line 511 "upb/json/parser.rl" +#line 538 "upb/json/parser.rl" { {cs = stack[--top]; goto _again;} } break; case 8: -#line 512 "upb/json/parser.rl" +#line 539 "upb/json/parser.rl" { {stack[top++] = cs; cs = 19; goto _again;} } break; case 9: -#line 514 "upb/json/parser.rl" +#line 541 "upb/json/parser.rl" { p--; {stack[top++] = cs; cs = 27; goto _again;} } break; case 10: -#line 519 "upb/json/parser.rl" +#line 546 "upb/json/parser.rl" { start_member(parser); } break; case 11: -#line 520 "upb/json/parser.rl" +#line 547 "upb/json/parser.rl" { CHECK_RETURN_TOP(end_member(parser)); } break; case 12: -#line 523 "upb/json/parser.rl" +#line 550 "upb/json/parser.rl" { clear_member(parser); } break; case 13: -#line 529 "upb/json/parser.rl" +#line 556 "upb/json/parser.rl" { start_object(parser); } break; case 14: -#line 532 "upb/json/parser.rl" +#line 559 "upb/json/parser.rl" { end_object(parser); } break; case 15: -#line 538 "upb/json/parser.rl" +#line 565 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_array(parser)); } break; case 16: -#line 542 "upb/json/parser.rl" +#line 569 "upb/json/parser.rl" { end_array(parser); } break; case 17: -#line 547 "upb/json/parser.rl" +#line 574 "upb/json/parser.rl" { start_number(parser, p); } break; case 18: -#line 548 "upb/json/parser.rl" +#line 575 "upb/json/parser.rl" { end_number(parser, p); } break; case 19: -#line 550 "upb/json/parser.rl" +#line 577 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_stringval(parser)); } break; case 20: -#line 551 "upb/json/parser.rl" +#line 578 "upb/json/parser.rl" { end_stringval(parser); } break; case 21: -#line 553 "upb/json/parser.rl" +#line 580 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, true)); } break; case 22: -#line 555 "upb/json/parser.rl" +#line 582 "upb/json/parser.rl" { CHECK_RETURN_TOP(putbool(parser, false)); } break; case 23: -#line 557 "upb/json/parser.rl" +#line 584 "upb/json/parser.rl" { /* null value */ } break; case 24: -#line 559 "upb/json/parser.rl" +#line 586 "upb/json/parser.rl" { CHECK_RETURN_TOP(start_subobject(parser)); } break; case 25: -#line 560 "upb/json/parser.rl" +#line 587 "upb/json/parser.rl" { end_subobject(parser); } break; case 26: -#line 565 "upb/json/parser.rl" +#line 592 "upb/json/parser.rl" { p--; {cs = stack[--top]; goto _again;} } break; -#line 838 "upb/json/parser.c" +#line 865 "upb/json/parser.c" } } @@ -847,7 +874,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size, _out: {} } -#line 587 "upb/json/parser.rl" +#line 614 "upb/json/parser.rl" if (p != pe) { upb_status_seterrf(parser->status, "Parse error at %s\n", p); @@ -888,13 +915,13 @@ void upb_json_parser_reset(upb_json_parser *p) { int top; // Emit Ragel initialization of the parser. -#line 892 "upb/json/parser.c" +#line 919 "upb/json/parser.c" { cs = json_start; top = 0; } -#line 627 "upb/json/parser.rl" +#line 654 "upb/json/parser.rl" p->current_state = cs; p->parser_top = top; p->text_begin = NULL; diff --git a/upb/json/parser.rl b/upb/json/parser.rl index 92a1566d9d32e..8ceca77d5fa86 100644 --- a/upb/json/parser.rl +++ b/upb/json/parser.rl @@ -286,7 +286,7 @@ badpadding: return false; } -static bool end_text(upb_json_parser *p, const char *ptr) { +static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) { assert(!p->accumulated); // TODO: handle this case. p->accumulated = p->text_begin; p->accumulated_len = ptr - p->text_begin; @@ -300,6 +300,24 @@ static bool end_text(upb_json_parser *p, const char *ptr) { upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL); } p->accumulated = NULL; + } else if (p->top->f && + upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM && + !is_num) { + + // Enum case: resolve enum symbolic name to integer value. + const upb_enumdef *enumdef = + (const upb_enumdef*)upb_fielddef_subdef(p->top->f); + + int32_t int_val = 0; + if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len, + &int_val)) { + upb_selector_t sel = getsel(p); + upb_sink_putint32(&p->top->sink, sel, int_val); + } else { + upb_status_seterrmsg(p->status, "Enum value name unknown"); + return false; + } + p->accumulated = NULL; } return true; @@ -308,29 +326,38 @@ static bool end_text(upb_json_parser *p, const char *ptr) { static bool start_stringval(upb_json_parser *p) { assert(p->top->f); - if (!upb_fielddef_isstring(p->top->f)) { + if (upb_fielddef_isstring(p->top->f)) { + if (!check_stack(p)) return false; + + // Start a new parser frame: parser frames correspond one-to-one with + // handler frames, and string events occur in a sub-frame. + upb_jsonparser_frame *inner = p->top + 1; + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); + upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); + inner->m = p->top->m; + inner->f = p->top->f; + p->top = inner; + + return true; + } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { + // Do nothing -- symbolic enum names in quotes remain in the + // current parser frame. + return true; + } else { upb_status_seterrf(p->status, - "String specified for non-string field: %s", + "String specified for non-string/non-enum field: %s", upb_fielddef_name(p->top->f)); return false; } - if (!check_stack(p)) return false; - - upb_jsonparser_frame *inner = p->top + 1; // TODO: check for overflow. - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - return true; } static void end_stringval(upb_json_parser *p) { - p->top--; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); + if (upb_fielddef_isstring(p->top->f)) { + upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); + upb_sink_endstr(&p->top->sink, sel); + p->top--; + } } static void start_number(upb_json_parser *p, const char *ptr) { @@ -339,7 +366,7 @@ static void start_number(upb_json_parser *p, const char *ptr) { } static void end_number(upb_json_parser *p, const char *ptr) { - end_text(p, ptr); + end_text(p, ptr, true); const char *myend = p->accumulated + p->accumulated_len; char *end; @@ -448,15 +475,15 @@ static void hex(upb_json_parser *p, const char *end) { // emit the codepoint as UTF-8. char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes. int length = 0; - if (codepoint < 0x7F) { + if (codepoint <= 0x7F) { utf8[0] = codepoint; length = 1; - } else if (codepoint < 0x07FF) { + } else if (codepoint <= 0x07FF) { utf8[1] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[0] = (codepoint & 0x1F) | 0xC0; length = 2; - } else /* codepoint < 0xFFFF */ { + } else /* codepoint <= 0xFFFF */ { utf8[2] = (codepoint & 0x3F) | 0x80; codepoint >>= 6; utf8[1] = (codepoint & 0x3F) | 0x80; @@ -492,7 +519,7 @@ static void hex(upb_json_parser *p, const char *end) { text = /[^\\"]/+ >{ start_text(parser, p); } - %{ CHECK_RETURN_TOP(end_text(parser, p)); } + %{ CHECK_RETURN_TOP(end_text(parser, p, false)); } ; unicode_char = diff --git a/upb/json/printer.c b/upb/json/printer.c index 44e6f8316d49a..28f3e4a25a62f 100644 --- a/upb/json/printer.c +++ b/upb/json/printer.c @@ -69,10 +69,10 @@ static inline char* json_nice_escape(char c) { } } -// Write a properly quoted and escaped string. +// Write a properly escaped string chunk. The surrounding quotes are *not* +// printed; this is so that the caller has the option of emitting the string +// content in chunks. static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { - print_data(p, "\"", 1); - const char* unescaped_run = NULL; for (unsigned int i = 0; i < len; i++) { char c = buf[i]; @@ -112,8 +112,6 @@ static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { if (unescaped_run) { print_data(p, unescaped_run, &buf[len] - unescaped_run); } - - print_data(p, "\"", 1); } #define CHKLENGTH(x) if (!(x)) return -1; @@ -158,8 +156,9 @@ static bool putkey(void *closure, const void *handler_data) { upb_json_printer *p = closure; const strpc *key = handler_data; print_comma(p); + print_data(p, "\"", 1); putstring(p, key->ptr, key->len); - print_data(p, ":", 1); + print_data(p, "\":", 2); return true; } @@ -200,6 +199,47 @@ TYPE_HANDLERS(uint64_t, fmt_uint64); #undef TYPE_HANDLERS +typedef struct { + void *keyname; + const upb_enumdef *enumdef; +} EnumHandlerData; + +static bool scalar_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + CHK(putkey(closure, hd->keyname)); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + +static bool repeated_enum(void *closure, const void *handler_data, + int32_t val) { + const EnumHandlerData *hd = handler_data; + upb_json_printer *p = closure; + print_comma(p); + + const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val); + if (symbolic_name) { + print_data(p, "\"", 1); + putstring(p, symbolic_name, strlen(symbolic_name)); + print_data(p, "\"", 1); + } else { + putint32_t(closure, NULL, val); + } + + return true; +} + static void *scalar_startsubmsg(void *closure, const void *handler_data) { return putkey(closure, handler_data) ? closure : UPB_BREAK; } @@ -310,27 +350,60 @@ static size_t putbytes(void *closure, const void *handler_data, const char *str, } size_t bytes = to - data; + print_data(p, "\"", 1); putstring(p, data, bytes); + print_data(p, "\"", 1); return len; } +static void *scalar_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + CHK(putkey(closure, handler_data)); + print_data(p, "\"", 1); + return p; +} + static size_t scalar_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - CHK(putkey(closure, handler_data)); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool scalar_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + +static void *repeated_startstr(void *closure, const void *handler_data, + size_t size_hint) { + UPB_UNUSED(handler_data); + UPB_UNUSED(size_hint); + upb_json_printer *p = closure; + print_comma(p); + print_data(p, "\"", 1); + return p; +} + static size_t repeated_str(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - print_comma(p); CHK(putstr(closure, handler_data, str, len, handle)); return len; } +static bool repeated_endstr(void *closure, const void *handler_data) { + UPB_UNUSED(handler_data); + upb_json_printer *p = closure; + print_data(p, "\"", 1); + return true; +} + static size_t scalar_bytes(void *closure, const void *handler_data, const char *str, size_t len, const upb_bufhandle *handle) { @@ -381,21 +454,44 @@ void sethandlers(const void *closure, upb_handlers *h) { TYPE(UPB_TYPE_FLOAT, float, float); TYPE(UPB_TYPE_DOUBLE, double, double); TYPE(UPB_TYPE_BOOL, bool, bool); - TYPE(UPB_TYPE_ENUM, int32, int32_t); TYPE(UPB_TYPE_INT32, int32, int32_t); TYPE(UPB_TYPE_UINT32, uint32, uint32_t); TYPE(UPB_TYPE_INT64, int64, int64_t); TYPE(UPB_TYPE_UINT64, uint64, uint64_t); + case UPB_TYPE_ENUM: { + // For now, we always emit symbolic names for enums. We may want an + // option later to control this behavior, but we will wait for a real + // need first. + EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); + hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); + hd->keyname = newstrpc(h, f); + upb_handlers_addcleanup(h, hd, free); + upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&enum_attr, hd); + + if (upb_fielddef_isseq(f)) { + upb_handlers_setint32(h, f, repeated_enum, &enum_attr); + } else { + upb_handlers_setint32(h, f, scalar_enum, &enum_attr); + } + + upb_handlerattr_uninit(&enum_attr); + break; + } case UPB_TYPE_STRING: - // XXX: this doesn't support strings that span buffers yet. if (upb_fielddef_isseq(f)) { + upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); upb_handlers_setstring(h, f, repeated_str, &empty_attr); + upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); } else { - upb_handlers_setstring(h, f, scalar_str, &name_attr); + upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); + upb_handlers_setstring(h, f, scalar_str, &empty_attr); + upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); } break; case UPB_TYPE_BYTES: - // XXX: this doesn't support strings that span buffers yet. + // XXX: this doesn't support strings that span buffers yet. The base64 + // encoder will need to be made resumable for this to work properly. if (upb_fielddef_isseq(f)) { upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); } else {