From d6f19d96ffb6ffd484c8bc92885c8547a8f967bf Mon Sep 17 00:00:00 2001 From: Andrew Hilger Date: Wed, 22 Jan 2025 07:32:47 -0800 Subject: [PATCH] remove decodes for unicode string constants Summary: Remove `b"".decode()` handling of non-ascii unicode literals by using hex-escaping instead octal escapes for non-ascii characters. Reviewed By: vitaut Differential Revision: D68465372 fbshipit-source-id: a373078ebcccc1efd7f608d032bab2d81fbcdf3b --- .../generate/t_mstch_python_generator.cc | 22 +++++-------------- .../python/types/constant_value.mustache | 8 ++----- .../gen-python/module/thrift_mutable_types.py | 6 ++--- .../python/gen-python/module/thrift_types.py | 6 ++--- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/thrift/compiler/generate/t_mstch_python_generator.cc b/thrift/compiler/generate/t_mstch_python_generator.cc index 66b659d19f3..67b5dca81ae 100644 --- a/thrift/compiler/generate/t_mstch_python_generator.cc +++ b/thrift/compiler/generate/t_mstch_python_generator.cc @@ -1287,8 +1287,7 @@ class python_mstch_const_value : public mstch_const_value { {"value:py3_enum_value_name", &python_mstch_const_value::py3_enum_value_name}, {"value:py3_binary?", &python_mstch_const_value::is_binary}, - {"value:contains_unicode?", - &python_mstch_const_value::contains_unicode}, + {"value:unicode_value", &python_mstch_const_value::unicode_value}, {"value:const_enum_type", &python_mstch_const_value::const_enum_type}, {"value:value_for_bool?", @@ -1302,21 +1301,12 @@ class python_mstch_const_value : public mstch_const_value { }); } - // A unicode string that actually contains unicode requires special handling - // because compiler can't directly render it as python unicode literal. - mstch::node contains_unicode() { - if (!is_nonbinary_string()) { - return false; + mstch::node unicode_value() { + if (type_ != cv::CV_STRING) { + return {}; } - const std::string& str = const_value_->get_string(); - return std::any_of( - str.begin(), str.end(), [](signed char c) { return c < 0; }); - } - - bool is_nonbinary_string() { - auto& ttype = const_value_->ttype(); - return type_ == cv::CV_STRING && ttype && - ttype->get_true_type()->is_string(); + return get_escaped_string( + const_value_->get_string()); } mstch::node is_binary() { diff --git a/thrift/compiler/generate/templates/python/types/constant_value.mustache b/thrift/compiler/generate/templates/python/types/constant_value.mustache index d3285fb8964..6609c54f505 100644 --- a/thrift/compiler/generate/templates/python/types/constant_value.mustache +++ b/thrift/compiler/generate/templates/python/types/constant_value.mustache @@ -47,12 +47,8 @@ file. }}{{/value:const_struct?}}{{! }}{{^value:const_struct?}}{{! }}{{#value:string?}}{{! - }}{{^value:contains_unicode?}}{{! - }}{{#value:py3_binary?}}b{{/value:py3_binary?}}"{{value:string_value}}"{{! - }}{{/value:contains_unicode?}}{{! - }}{{#value:contains_unicode?}}{{! - }}b"{{value:string_value}}".decode(){{! - }}{{/value:contains_unicode?}}{{! + }}{{#value:py3_binary?}}b"{{value:string_value}}"{{/value:py3_binary?}}{{! + }}{{^value:py3_binary?}}"{{value:unicode_value}}"{{/value:py3_binary?}}{{! }}{{/value:string?}}{{! }}{{#value:map?}}_fbthrift_python_types.Map({{! }}{{#value:map_key_type}}{{> types/typeinfo }}{{/value:map_key_type}}, {{! diff --git a/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_mutable_types.py b/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_mutable_types.py index 68693e23b7e..7b28841131f 100644 --- a/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_mutable_types.py +++ b/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_mutable_types.py @@ -780,9 +780,9 @@ def _to_py_deprecated(self): char2ascii = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_i32, { "'": 39, "\"": 34, "\\": 92, "a": 97}) -escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", b"\302\253".decode(), "j", b"\302\246".decode(), "ayyy", b"\302\253yyy".decode(), "jyyy", b"\302\246yyy".decode(), "zzza", b"zzz\302\253".decode(), "zzzj", b"zzz\302\246".decode(), "zzzayyy", b"zzz\302\253yyy".decode(), "zzzjyyy", b"zzz\302\246yyy".decode(), )) +escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", "«", "j", "¦", "ayyy", "«yyy", "jyyy", "¦yyy", "zzza", "zzz«", "zzzj", "zzz¦", "zzzayyy", "zzz«yyy", "zzzjyyy", "zzz¦yyy", )) -unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", b"Saint Barth\303\251lemy".decode(), )) +unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", "Saint Barthélemy", )) false_c = False @@ -816,7 +816,7 @@ def _to_py_deprecated(self): empty_string_string_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { }) -unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", b"B\303\211".decode(): b"Saint Barth\303\251lemy".decode()}) +unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", "BÉ": "Saint Barthélemy"}) maxIntDec = 9223372036854775807 diff --git a/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_types.py b/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_types.py index a1a66d56b21..241a5640da8 100644 --- a/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_types.py +++ b/thrift/compiler/test/fixtures/constants/out/python/gen-python/module/thrift_types.py @@ -804,9 +804,9 @@ def _fbthrift_metadata__struct_union2(): char2ascii = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_i32, { "'": 39, "\"": 34, "\\": 92, "a": 97}) -escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", b"\302\253".decode(), "j", b"\302\246".decode(), "ayyy", b"\302\253yyy".decode(), "jyyy", b"\302\246yyy".decode(), "zzza", b"zzz\302\253".decode(), "zzzj", b"zzz\302\246".decode(), "zzzayyy", b"zzz\302\253yyy".decode(), "zzzjyyy", b"zzz\302\246yyy".decode(), )) +escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", "«", "j", "¦", "ayyy", "«yyy", "jyyy", "¦yyy", "zzza", "zzz«", "zzzj", "zzz¦", "zzzayyy", "zzz«yyy", "zzzjyyy", "zzz¦yyy", )) -unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", b"Saint Barth\303\251lemy".decode(), )) +unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", "Saint Barthélemy", )) false_c = False @@ -840,7 +840,7 @@ def _fbthrift_metadata__struct_union2(): empty_string_string_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { }) -unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", b"B\303\211".decode(): b"Saint Barth\303\251lemy".decode()}) +unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", "BÉ": "Saint Barthélemy"}) maxIntDec = 9223372036854775807