Skip to content

Commit

Permalink
Improve handling node properties in deserialization (#463)
Browse files Browse the repository at this point in the history
* remove redundant string allocations for tag names

* remove redundant string allocations for anchor names

* addressed coverage loss
  • Loading branch information
fktn-k authored Jan 12, 2025
1 parent 77ba5ef commit e930dee
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 73 deletions.
37 changes: 19 additions & 18 deletions include/fkYAML/detail/input/deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,15 +254,15 @@ class basic_deserializer {
// If node properties and a followed node are on the different line, the properties belong to the root
// node.
if (m_needs_anchor_impl) {
m_root_anchor_name = std::move(m_anchor_name);
m_root_anchor_name = m_anchor_name;
m_needs_anchor_impl = false;
m_anchor_name.clear();
m_anchor_name = {};
}

if (m_needs_tag_impl) {
m_root_tag_name = std::move(m_tag_name);
m_root_tag_name = m_tag_name;
m_needs_tag_impl = false;
m_tag_name.clear();
m_tag_name = {};
}

line = lexer.get_lines_processed();
Expand Down Expand Up @@ -1037,7 +1037,7 @@ class basic_deserializer {
lexer.get_last_token_begin_pos());
}

m_anchor_name.assign(token.str.begin(), token.str.end());
m_anchor_name = token.str;
m_needs_anchor_impl = true;

if (!m_needs_tag_impl) {
Expand All @@ -1055,7 +1055,7 @@ class basic_deserializer {
lexer.get_last_token_begin_pos());
}

m_tag_name.assign(token.str.begin(), token.str.end());
m_tag_name = token.str;
m_needs_tag_impl = true;

if (!m_needs_anchor_impl) {
Expand Down Expand Up @@ -1233,12 +1233,13 @@ class basic_deserializer {

// apply node properties if any to the root mapping node.
if (!m_root_anchor_name.empty()) {
mp_current_node->add_anchor_name(std::move(m_root_anchor_name));
m_root_anchor_name.clear();
mp_current_node->add_anchor_name(
std::string(m_root_anchor_name.begin(), m_root_anchor_name.end()));
m_root_anchor_name = {};
}
if (!m_root_tag_name.empty()) {
mp_current_node->add_tag_name(std::move(m_root_tag_name));
m_root_tag_name.clear();
mp_current_node->add_tag_name(std::string(m_root_tag_name.begin(), m_root_tag_name.end()));
m_root_tag_name = {};
}
}
}
Expand Down Expand Up @@ -1287,15 +1288,15 @@ class basic_deserializer {
/// @param node A node type object to be set YAML node properties.
void apply_node_properties(basic_node_type& node) {
if (m_needs_anchor_impl) {
node.add_anchor_name(m_anchor_name);
node.add_anchor_name(std::string(m_anchor_name.begin(), m_anchor_name.end()));
m_needs_anchor_impl = false;
m_anchor_name.clear();
m_anchor_name = {};
}

if (m_needs_tag_impl) {
node.add_tag_name(m_tag_name);
node.add_tag_name(std::string(m_tag_name.begin(), m_tag_name.end()));
m_needs_tag_impl = false;
m_tag_name.clear();
m_tag_name = {};
}
}

Expand All @@ -1321,13 +1322,13 @@ class basic_deserializer {
/// A flag to determine the need for a value separator or a flow suffix to follow.
flow_token_state_t m_flow_token_state {flow_token_state_t::NEEDS_VALUE_OR_SUFFIX};
/// The last YAML anchor name.
std::string m_anchor_name;
str_view m_anchor_name;
/// The last tag name.
std::string m_tag_name;
str_view m_tag_name;
/// The root YAML anchor name. (maybe empty and unused)
std::string m_root_anchor_name;
str_view m_root_anchor_name;
/// The root tag name. (maybe empty and unused)
std::string m_root_tag_name;
str_view m_root_tag_name;
};

FK_YAML_DETAIL_NAMESPACE_END
Expand Down
44 changes: 27 additions & 17 deletions include/fkYAML/detail/input/tag_resolver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,18 @@ class tag_resolver {
/// @brief Resolve the input tag name into an expanded tag name prepended with a registered prefix.
/// @param tag The input tag name.
/// @return The type of a node deduced from the given tag name.
static tag_t resolve_tag(const std::string& tag, const std::shared_ptr<doc_metainfo_type>& directives) {
static tag_t resolve_tag(const str_view tag, const std::shared_ptr<doc_metainfo_type>& directives) {
const std::string normalized = normalize_tag_name(tag, directives);
return convert_to_tag_type(normalized);
}

private:
static std::string normalize_tag_name(
const std::string& tag, const std::shared_ptr<doc_metainfo_type>& directives) {
static std::string normalize_tag_name(const str_view tag, const std::shared_ptr<doc_metainfo_type>& directives) {
if FK_YAML_UNLIKELY (tag.empty()) {
throw invalid_tag("tag must not be empty.", "");
}
if FK_YAML_UNLIKELY (tag[0] != '!') {
throw invalid_tag("tag must start with \'!\'", tag.c_str());
throw invalid_tag("tag must start with \'!\'", std::string(tag.begin(), tag.end()).c_str());
}

if (tag.size() == 1) {
Expand All @@ -56,7 +55,7 @@ class tag_resolver {
// * tag:yaml.org,2002:str
// See the "Non-Specific Tags" section in https://yaml.org/spec/1.2.2/#691-node-tags.
// The interpretation cannot take place here because the input lacks the corresponding value.
return tag;
return {tag.begin(), tag.end()};
}

std::string normalized {"!<"};
Expand All @@ -66,28 +65,34 @@ class tag_resolver {
const bool is_null_or_empty = !directives || directives->secondary_handle_prefix.empty();
if (is_null_or_empty) {
normalized.append(default_secondary_handle_prefix.begin(), default_secondary_handle_prefix.end());
normalized += tag.substr(2);
}
else {
normalized += directives->secondary_handle_prefix + tag.substr(2);
normalized += directives->secondary_handle_prefix;
}

const str_view body = tag.substr(2);
normalized.append(body.begin(), body.end());
break;
}
case '<':
if (tag[2] == '!') {
const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty();
if (is_null_or_empty) {
normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end());
return normalized + tag.substr(3);
}
return normalized + directives->primary_handle_prefix + tag.substr(3);
else {
normalized += directives->primary_handle_prefix;
}

const str_view body = tag.substr(3);
return normalized.append(body.begin(), body.end());
}

// verbatim tags must be delivered as-is to the application.
// See https://yaml.org/spec/1.2.2/#691-node-tags for more details.
return tag;
return {tag.begin(), tag.end()};
default: {
auto tag_end_pos = tag.find_first_of('!', 1);
const std::size_t tag_end_pos = tag.find_first_of('!', 1);

// handle a named handle (!tag!suffix -> !<[tag][suffix]>)
if (tag_end_pos != std::string::npos) {
Expand All @@ -96,35 +101,40 @@ class tag_resolver {

const bool is_null_or_empty = !directives || directives->named_handle_map.empty();
if FK_YAML_UNLIKELY (is_null_or_empty) {
throw invalid_tag("named handle has not been registered.", tag.c_str());
throw invalid_tag(
"named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str());
}

// find the extracted named handle in the map.
auto named_handle_itr = directives->named_handle_map.find(tag.substr(0, tag_end_pos + 1));
const str_view named_handle = tag.substr(0, tag_end_pos + 1);
auto named_handle_itr = directives->named_handle_map.find({named_handle.begin(), named_handle.end()});
auto end_itr = directives->named_handle_map.end();
if FK_YAML_UNLIKELY (named_handle_itr == end_itr) {
throw invalid_tag("named handle has not been registered.", tag.c_str());
throw invalid_tag(
"named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str());
}

// The YAML spec prohibits expanding the percent-encoded characters (%xx -> a UTF-8 byte).
// So no conversion takes place.
// See https://yaml.org/spec/1.2.2/#56-miscellaneous-characters for more details.

normalized += named_handle_itr->second;
normalized.append(tag.begin() + (static_cast<std::ptrdiff_t>(tag_end_pos) + 1), tag.end());
const str_view body = tag.substr(tag_end_pos + 1);
normalized.append(body.begin(), body.end());
break;
}

// handle a primary tag handle (!suffix -> !<[primary][suffix]>)
const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty();
if (is_null_or_empty) {
normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end());
normalized += tag.substr(1);
}
else {
normalized += directives->primary_handle_prefix + tag.substr(1);
normalized += directives->primary_handle_prefix;
}

const str_view body = tag.substr(1);
normalized.append(body.begin(), body.end());
break;
}
}
Expand Down
Loading

0 comments on commit e930dee

Please sign in to comment.