From 5cc6552dd589bf0fa898345a5f30f10b07fe416e Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Tue, 21 Jan 2025 02:26:08 +0100 Subject: [PATCH] format --- src/cgspell.cpp | 186 +++++++++----------- src/cgspell.hpp | 277 ++++++++++++++--------------- src/suggest.cpp | 460 ++++++++++++++++++++++++++++-------------------- 3 files changed, 488 insertions(+), 435 deletions(-) diff --git a/src/cgspell.cpp b/src/cgspell.cpp index 0e83fdb..88b9d81 100644 --- a/src/cgspell.cpp +++ b/src/cgspell.cpp @@ -27,131 +27,119 @@ static const string tag_unknown = "?"; * or 0 if invalid. */ size_t u8_first_codepoint_size(const unsigned char* c) { - if (*c <= 127) { - return 1; - } - else if ( (*c & (128 + 64 + 32 + 16)) == (128 + 64 + 32 + 16) ) { - return 4; - } - else if ( (*c & (128 + 64 + 32 )) == (128 + 64 + 32) ) { - return 3; - } - else if ( (*c & (128 + 64 )) == (128 + 64)) { - return 2; - } - else { - return 0; - } + if (*c <= 127) { + return 1; + } + else if ((*c & (128 + 64 + 32 + 16)) == (128 + 64 + 32 + 16)) { + return 4; + } + else if ((*c & (128 + 64 + 32)) == (128 + 64 + 32)) { + return 3; + } + else if ((*c & (128 + 64)) == (128 + 64)) { + return 2; + } + else { + return 0; + } } -bool is_cg_tag(const string & str) { - // Note: invalid codepoints are also treated as tags; ¯\_(ツ)_/¯ - return str.size() > u8_first_codepoint_size((const unsigned char*)str.c_str()); +bool is_cg_tag(const string& str) { + // Note: invalid codepoints are also treated as tags; ¯\_(ツ)_/¯ + return str.size() > + u8_first_codepoint_size((const unsigned char*)str.c_str()); } -void print_cg_subreading(size_t indent, - const string& form, - const vector::const_iterator beg, - const vector::const_iterator end, - std::ostream & os, - Weight w, - variant mw_a, - const std::string& errtag) -{ +void print_cg_subreading(size_t indent, const string& form, + const vector::const_iterator beg, + const vector::const_iterator end, std::ostream& os, Weight w, + variant mw_a, const std::string& errtag) { os << string(indent, '\t'); bool in_lemma = false; - for(vector::const_iterator it = beg; it != end; ++it) { + for (vector::const_iterator it = beg; it != end; ++it) { bool is_tag = is_cg_tag(*it); - if(in_lemma) { - if(is_tag) { + if (in_lemma) { + if (is_tag) { in_lemma = false; os << "\""; } } else { - if(!is_tag) { + if (!is_tag) { in_lemma = true; os << "\""; } } os << (*it); } - if(in_lemma) { + if (in_lemma) { os << "\""; } - if(indent == 1) { + if (indent == 1) { os << " "; - std::visit([&](auto&& arg){ - using T = std::decay_t; - if constexpr (std::is_same_v) {} - if constexpr (std::is_same_v) { - os << " "; - } - }, mw_a); + std::visit( + [&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + } + if constexpr (std::is_same_v) { + os << " "; + } + }, + mw_a); os << " " << errtag; os << " \"" << form << "\"S"; } os << std::endl; } -const void print_readings(const vector& ana, - const string& form, - std::ostream& os, - Weight w, - variant w_a, - const std::string& errtag) -{ +const void print_readings(const vector& ana, const string& form, + std::ostream& os, Weight w, variant w_a, + const std::string& errtag) { size_t indent = 1; auto beg = ana.begin(), end = ana.end(); - while(true) { + while (true) { bool sub_found = false; - for(auto it = end-1; it > ana.begin(); --it) { - if(subreading_separator.compare(*it) == 0) { + for (auto it = end - 1; it > ana.begin(); --it) { + if (subreading_separator.compare(*it) == 0) { // Found a sub-reading mark beg = ++it; sub_found = true; break; } } - if(!sub_found) { + if (!sub_found) { // No remaining sub-marks to the left beg = ana.begin(); } - print_cg_subreading(indent, - form, - beg, - end, - os, - w, - w_a, - errtag); - if(beg == ana.begin()) { + print_cg_subreading(indent, form, beg, end, os, w, w_a, errtag); + if (beg == ana.begin()) { break; } else { ++indent; end = beg; - if(sub_found) { + if (sub_found) { --end; // skip the subreading separator symbol } } } } -void Speller::spell(const string& inform, std::ostream& os) -{ +void Speller::spell(const string& inform, std::ostream& os) { bool do_suggest = real_word || !speller->spell(inform); - if(!do_suggest) { - if(analyse_when_correct) { + if (!do_suggest) { + if (analyse_when_correct) { // This would happen if a correct inform is in the // speller, but not in whatever analyser you used to // create the input to cgspell auto aq = speller->analyseSymbols(inform); - while(!aq.empty()) { + while (!aq.empty()) { const auto ana = aq.top().first; const Weight& w = aq.top().second; // No max_weight for regular words - print_readings(ana, inform, os, w, Nothing(), CGSPELL_CORRECT_TAG); + print_readings( + ana, inform, os, w, Nothing(), CGSPELL_CORRECT_TAG); aq.pop(); } } @@ -163,17 +151,17 @@ void Speller::spell(const string& inform, std::ostream& os) auto cq = speller->suggest(inform); auto slimit = limit; std::ostringstream result; - while(!cq.empty() && (slimit--) > 0) { + while (!cq.empty() && (slimit--) > 0) { const auto& corrform = cq.top().first; const Weight& w = cq.top().second; - if(max_weight > 0.0 && w >= max_weight) { + if (max_weight > 0.0 && w >= max_weight) { break; } auto aq = speller->analyseSymbols(corrform, true); - while(!aq.empty()) { + while (!aq.empty()) { const auto& ana = aq.top().first; const Weight& w_a = (aq.top().second); - if(max_analysis_weight > 0.0 && w_a >= max_analysis_weight) { + if (max_analysis_weight > 0.0 && w_a >= max_analysis_weight) { break; } print_readings(ana, corrform, result, w, w_a, CGSPELL_TAG); @@ -181,7 +169,7 @@ void Speller::spell(const string& inform, std::ostream& os) } cq.pop(); } - if(cache.size() > cache_max) { + if (cache.size() > cache_max) { std::unordered_map().swap(cache); } cache[inform] = result.str(); @@ -191,31 +179,28 @@ void Speller::spell(const string& inform, std::ostream& os) void proc_sent(const SpellSent& sent, std::ostream& os, Speller& s) { - bool do_spell = (sent.cohorts.size() < s.min_sent_max_unknown) - || (sent.n_unknowns <= s.max_sent_unknown_rate * sent.cohorts.size()); - for(const auto& r : sent.cohorts) { - for(const auto& line : r.lines) { + bool do_spell = + (sent.cohorts.size() < s.min_sent_max_unknown) || + (sent.n_unknowns <= s.max_sent_unknown_rate * sent.cohorts.size()); + for (const auto& r : sent.cohorts) { + for (const auto& line : r.lines) { os << line << std::endl; } - if (!r.wf.empty() && (s.real_word || r.unknown)) - { - if(do_spell) { + if (!r.wf.empty() && (s.real_word || r.unknown)) { + if (do_spell) { s.spell(r.wf, os); } else { os << "\t\"" << r.wf << "\" ? " << std::endl; } } - for(const auto& postblank : r.postblank) { + for (const auto& postblank : r.postblank) { os << postblank << std::endl; } } } -void run_cgspell(std::istream& is, - std::ostream& os, - Speller& s) -{ +void run_cgspell(std::istream& is, std::ostream& os, Speller& s) { SpellSent sent = { {}, 0 }; SpellCohort c = { "", {}, {}, false }; for (string line; std::getline(is, line);) { @@ -226,29 +211,28 @@ void run_cgspell(std::istream& is, // Was the previous cohort a sent delimiter? std::match_results del_res; std::regex_match(c.wf.c_str(), del_res, s.sent_delimiters); - if(!del_res.empty() && del_res[0].length() != 0) { + if (!del_res.empty() && del_res[0].length() != 0) { proc_sent(sent, os, s); sent = { {}, 0 }; } - c = SpellCohort({ result[2], {}, {}, false}); + c = SpellCohort({ result[2], {}, {}, false }); c.lines.push_back(line); } - else if (!result.empty() && result[5].length() != 0) - { - std::stringstream ana(result[5]); - std::string tag; - c.unknown = false; - while (ana >> tag) { - if(tag == tag_unknown) { - c.unknown = true; - } - } - if (c.unknown) { - sent.n_unknowns += 1; - } - c.lines.push_back(line); + else if (!result.empty() && result[5].length() != 0) { + std::stringstream ana(result[5]); + std::string tag; + c.unknown = false; + while (ana >> tag) { + if (tag == tag_unknown) { + c.unknown = true; + } + } + if (c.unknown) { + sent.n_unknowns += 1; + } + c.lines.push_back(line); } - else if(!result.empty() && result[7].length() != 0) { + else if (!result.empty() && result[7].length() != 0) { // TODO: Can we ever get a flush in the middle of readings? sent.cohorts.push_back(c); proc_sent(sent, os, s); diff --git a/src/cgspell.hpp b/src/cgspell.hpp index 406a18e..a7ea052 100644 --- a/src/cgspell.hpp +++ b/src/cgspell.hpp @@ -17,29 +17,29 @@ #pragma once #ifndef a1e13de0fc0e1f37_CGSPELL_H -#define a1e13de0fc0e1f37_CGSPELL_H +# define a1e13de0fc0e1f37_CGSPELL_H -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include // divvun-gramcheck: -#include "util.hpp" +# include "util.hpp" // hfst: -#include +# include // variants: -#include +# include namespace divvun { -using std::variant; +using hfst_ospell::Weight; +using std::pair; using std::string; +using std::variant; using std::vector; -using std::pair; -using hfst_ospell::Weight; struct SpellCohort { string wf; @@ -53,140 +53,133 @@ struct SpellSent { }; class Speller { - public: - Speller(const string& zhfstpath, - bool verbose, - Weight max_analysis_weight_, - Weight max_weight_, - bool real_word_, - unsigned long limit_, - hfst_ospell::Weight beam, - float time_cutoff, - float max_sent_unknown_rate_) - : max_analysis_weight(max_analysis_weight_) - , max_weight(max_weight_) - , real_word(real_word_) - , limit(limit_) - , max_sent_unknown_rate(max_sent_unknown_rate_) - , speller(new hfst_ospell::ZHfstOspeller()) - { - speller->read_zhfst(zhfstpath); - if (!speller) { - throw std::runtime_error("libdivvun: ERROR: Couldn't read zhfst archive " + zhfstpath); - } - else { - speller->set_beam(beam); - speller->set_time_cutoff(time_cutoff); - // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) - // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) - } +public: + Speller(const string& zhfstpath, bool verbose_, + Weight max_analysis_weight_, Weight max_weight_, bool real_word_, + unsigned long limit_, hfst_ospell::Weight beam, float time_cutoff, + float max_sent_unknown_rate_) + : max_analysis_weight(max_analysis_weight_) + , max_weight(max_weight_) + , real_word(real_word_) + , limit(limit_) + , max_sent_unknown_rate(max_sent_unknown_rate_) + , speller(new hfst_ospell::ZHfstOspeller()) + , verbose(verbose_) { + speller->read_zhfst(zhfstpath); + if (!speller) { + throw std::runtime_error( + "libdivvun: ERROR: Couldn't read zhfst archive " + zhfstpath); + } + else { + speller->set_beam(beam); + speller->set_time_cutoff(time_cutoff); + // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) + // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) + } + } + Speller(const string& errpath, const string& lexpath, bool verbose_, + Weight max_analysis_weight_, Weight max_weight_, bool real_word_, + unsigned long limit_, hfst_ospell::Weight beam, float time_cutoff, + float max_sent_unknown_rate_) + : max_analysis_weight(max_analysis_weight_) + , max_weight(max_weight_) + , real_word(real_word_) + , limit(limit_) + , max_sent_unknown_rate(max_sent_unknown_rate_) + , speller(new hfst_ospell::ZHfstOspeller()) + , verbose(verbose_) { + FILE* err_fp = fopen(errpath.c_str(), "r"); + if (err_fp == nullptr) { + throw std::runtime_error( + "libdivvun: ERROR: Couldn't read error model " + errpath); + } + FILE* lex_fp = fopen(lexpath.c_str(), "r"); + if (lex_fp == NULL) { + throw std::runtime_error( + "libdivvun: ERROR: Couldn't read language model " + lexpath); + } + err = std::unique_ptr( + new hfst_ospell::Transducer(err_fp)); + lex = std::unique_ptr( + new hfst_ospell::Transducer(lex_fp)); + // This one is freed by ZHfstOspeller, but it seems like its acceptor and errmodel are not! + auto lmspeller = new hfst_ospell::Speller(&*err, &*lex); + speller->inject_speller(lmspeller); + if (!speller) { + throw std::runtime_error( + "libdivvun: ERROR: Couldn't read lexicon " + lexpath + + " / errmodel " + errpath); + } + else { + speller->set_beam(beam); + speller->set_time_cutoff(time_cutoff); + // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) + // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) } - Speller(const string& errpath, - const string& lexpath, - bool verbose, - Weight max_analysis_weight_, - Weight max_weight_, - bool real_word_, - unsigned long limit_, - hfst_ospell::Weight beam, - float time_cutoff, - float max_sent_unknown_rate_) - : max_analysis_weight(max_analysis_weight_) - , max_weight(max_weight_) - , real_word(real_word_) - , limit(limit_) - , max_sent_unknown_rate(max_sent_unknown_rate_) - , speller(new hfst_ospell::ZHfstOspeller()) - { - FILE* err_fp = fopen(errpath.c_str(), "r"); - if (err_fp == nullptr) { - throw std::runtime_error("libdivvun: ERROR: Couldn't read error model " + errpath); - } - FILE* lex_fp = fopen(lexpath.c_str(), "r"); - if (lex_fp == NULL) { - throw std::runtime_error("libdivvun: ERROR: Couldn't read language model " + lexpath); - } - err = std::unique_ptr (new hfst_ospell::Transducer(err_fp)); - lex = std::unique_ptr (new hfst_ospell::Transducer(lex_fp)); - // This one is freed by ZHfstOspeller, but it seems like its acceptor and errmodel are not! - auto lmspeller = new hfst_ospell::Speller(&*err, &*lex); - speller->inject_speller(lmspeller); - if (!speller) { - throw std::runtime_error("libdivvun: ERROR: Couldn't read lexicon " + lexpath+ " / errmodel " + errpath); - } - else { - speller->set_beam(beam); - speller->set_time_cutoff(time_cutoff); - // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) - // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) - } + } + Speller(hfst_ospell::Transducer* err_, hfst_ospell::Transducer* lex_, + bool verbose_, Weight max_analysis_weight_, Weight max_weight_, + bool real_word_, unsigned long limit_, hfst_ospell::Weight beam, + float time_cutoff, float max_sent_unknown_rate_) + : max_analysis_weight(max_analysis_weight_) + , max_weight(max_weight_) + , real_word(real_word_) + , limit(limit_) + , max_sent_unknown_rate(max_sent_unknown_rate_) + , speller(new hfst_ospell::ZHfstOspeller()) + , err(err_) + , lex(lex_) + , verbose(verbose_) { + // This one is freed by ZHfstOspeller, but it seems like its acceptor and errmodel are not! + auto lmspeller = new hfst_ospell::Speller(&*err, &*lex); + speller->inject_speller(lmspeller); + if (!speller) { + throw std::runtime_error( + "libdivvun: ERROR: Couldn't read lexicon / errmodel"); } - Speller(hfst_ospell::Transducer* err_, - hfst_ospell::Transducer* lex_, - bool verbose, - Weight max_analysis_weight_, - Weight max_weight_, - bool real_word_, - unsigned long limit_, - hfst_ospell::Weight beam, - float time_cutoff, - float max_sent_unknown_rate_) - : max_analysis_weight(max_analysis_weight_) - , max_weight(max_weight_) - , real_word(real_word_) - , limit(limit_) - , max_sent_unknown_rate(max_sent_unknown_rate_) - , speller(new hfst_ospell::ZHfstOspeller()) - , err(err_) - , lex(lex_) - { - // This one is freed by ZHfstOspeller, but it seems like its acceptor and errmodel are not! - auto lmspeller = new hfst_ospell::Speller(&*err, &*lex); - speller->inject_speller(lmspeller); - if (!speller) { - throw std::runtime_error("libdivvun: ERROR: Couldn't read lexicon / errmodel"); - } - else { - speller->set_beam(beam); - speller->set_time_cutoff(time_cutoff); - // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) - // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) - } + else { + speller->set_beam(beam); + speller->set_time_cutoff(time_cutoff); + // s.set_queue_limit(limit); // TODO: This seems to choose first three, not top three (same with /usr/bin/hfst-ospell) + // s.set_weight_limit(max_weight); // TODO: Has no effect? (same with /usr/bin/hfst-ospell) } - const Weight max_analysis_weight; - const Weight max_weight; - const bool real_word; - const unsigned long limit; - // TODO: Make max_sent_unknown_rate and sent_delimiters configurable in cli? - float max_sent_unknown_rate = 0.4; // Don't spell if >= 40 % of the sentence is unknown. - float min_sent_max_unknown = 7; // For sentences of < 7 cohorts, spell even if most of it is unknown. - std::basic_regex sent_delimiters = std::basic_regex ("^[.!?]$"); - void spell(const string& form, std::ostream& os); - bool analyse_when_correct = false; // Look up the analysis for forms that had an analysis in lex already. - private: - // const void print_readings(const vector& ana, - // const string& form, - // std::ostream& os, - // Weight w, - // variant w_a, - // const std::string& errtag) const; - std::unique_ptr speller; - const string CGSPELL_TAG = ""; - const string CGSPELL_CORRECT_TAG = ""; - // Only used when initialised with errpath/lexpath: - std::unique_ptr err; - std::unique_ptr lex; - // A cache of misspelt words, with suggestions. For server use, where texts are - // requested over and over again with very little change, this makes the UI a lot - // snappier. - std::unordered_map cache; - // TODO: tweak cache max (currently a drop in the ocean compared to what libhfstospell already uses) - size_t cache_max = 10000; + } + const Weight max_analysis_weight; + const Weight max_weight; + const bool real_word; + const unsigned long limit; + // TODO: Make max_sent_unknown_rate and sent_delimiters configurable in cli? + float max_sent_unknown_rate = + 0.4; // Don't spell if >= 40 % of the sentence is unknown. + float min_sent_max_unknown = + 7; // For sentences of < 7 cohorts, spell even if most of it is unknown. + std::basic_regex sent_delimiters = std::basic_regex("^[.!?]$"); + void spell(const string& form, std::ostream& os); + bool analyse_when_correct = + false; // Look up the analysis for forms that had an analysis in lex already. +private: + // const void print_readings(const vector& ana, + // const string& form, + // std::ostream& os, + // Weight w, + // variant w_a, + // const std::string& errtag) const; + std::unique_ptr speller; + const string CGSPELL_TAG = ""; + const string CGSPELL_CORRECT_TAG = ""; + // Only used when initialised with errpath/lexpath: + std::unique_ptr err; + std::unique_ptr lex; + // A cache of misspelt words, with suggestions. For server use, where texts are + // requested over and over again with very little change, this makes the UI a lot + // snappier. + std::unordered_map cache; + // TODO: tweak cache max (currently a drop in the ocean compared to what libhfstospell already uses) + size_t cache_max = 10000; + bool verbose; }; -void run_cgspell(std::istream& is, - std::ostream& os, - Speller& s); +void run_cgspell(std::istream& is, std::ostream& os, Speller& s); } diff --git a/src/suggest.cpp b/src/suggest.cpp index 2a41758..13f7500 100644 --- a/src/suggest.cpp +++ b/src/suggest.cpp @@ -78,7 +78,8 @@ const std::basic_regex CG_TAG_TYPE( const std::basic_regex MSG_TEMPLATE_REL("^[$][0-9]+$"); const std::basic_regex DELETE_REL("^DELETE[0-9]*"); -const std::basic_regex LEFT_RIGHT_DELETE_REL("^(LEFT|RIGHT|DELETE[0-9]*)$"); +const std::basic_regex LEFT_RIGHT_DELETE_REL( + "^(LEFT|RIGHT|DELETE[0-9]*)$"); enum LineType { WordformL, ReadingL, BlankL }; @@ -229,18 +230,23 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) { r.added = NotAdded; r.coerror = false; r.fixedcase = false; - bool delete_self = false; // may be changed by DELETE tag, but need to know id to set the relation + bool delete_self = + false; // may be changed by DELETE tag, but need to know id to set the relation for (auto& tag : allmatches(tags, CG_TAGS_RE)) { // proc_tags std::match_results result; std::regex_match(tag.c_str(), result, CG_TAG_TYPE); if (tag == "COERROR") { // COERROR kept for backward-compatibility r.coerror = true; } - else if (tag == "&SUGGEST" || tag == "SUGGEST") { // &SUGGEST kept for backward-compatibility - r.suggest = true; + else if (tag == "&SUGGEST" || + tag == + "SUGGEST") { // &SUGGEST kept for backward-compatibility + r.suggest = true; } - else if (tag == "&SUGGESTWF" || tag == "SUGGESTWF") { // &SUGGESTWF kept for backward-compatibility - r.suggestwf = true; + else if (tag == "&SUGGESTWF" || + tag == + "SUGGESTWF") { // &SUGGESTWF kept for backward-compatibility + r.suggestwf = true; } else if (result.empty()) { gentags.push_back(tag); @@ -255,21 +261,25 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) { else if (tag == "&ADDED-BEFORE-BLANK") { r.added = AddedBeforeBlank; } - else if (tag == "&LINK" || tag == "&COERROR") { // &LINK kept for backward-compatibility + else if (tag == "&LINK" || + tag == + "&COERROR") { // &LINK kept for backward-compatibility r.coerror = true; } else { r.errtypes.insert(fromUtf8(result[2])); } } - else if (tag == "DELETE") { // Shorthand: the tag DELETE means R:DELETE:id_of_this_cohort + else if ( + tag == + "DELETE") { // Shorthand: the tag DELETE means R:DELETE:id_of_this_cohort delete_self = true; } else if (result[3].length() != 0 && result[4].length() != 0) { try { rel_id target = stoi(result[4]); auto rel_name = result[3]; - r.rels.insert({rel_name, target}); + r.rels.insert({ rel_name, target }); } catch (...) { std::cerr << "divvun-suggest: WARNING: Couldn't parse " @@ -302,8 +312,8 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) { r.coerrtypes.insert(fromUtf8(result[9])); } } - if(delete_self) { - r.rels.insert({"DELETE", r.id}); + if (delete_self) { + r.rels.insert({ "DELETE", r.id }); } const auto& tagsplus = join(gentags, "+"); r.ana = lemma + "+" + tagsplus; @@ -314,8 +324,8 @@ const Reading proc_subreading(const string& line, bool generate_all_readings) { }; -const Reading proc_reading(const hfst::HfstTransducer& generator, const string& line, - bool generate_all_readings) { +const Reading proc_reading(const hfst::HfstTransducer& generator, + const string& line, bool generate_all_readings) { stringstream ss(line); string subline; std::deque subs; @@ -361,8 +371,7 @@ bool cohort_empty(const Cohort& c) { return c.form.empty(); } -const Cohort DEFAULT_COHORT = { {}, 0, 0, {}, {}, {}, NotAdded, {} -}; +const Cohort DEFAULT_COHORT = { {}, 0, 0, {}, {}, {}, NotAdded, {} }; // https://stackoverflow.com/a/1464684/69663 template @@ -381,10 +390,9 @@ Iterator Dedupe(Iterator first, Iterator last) { * vector. */ void rel_on_match(const relations& rels, const std::basic_regex& name, - const Sentence& sentence, - const std::function& fn) { + const Sentence& sentence, + const std::function& fn) { for (const auto& rel : rels) { std::match_results result; std::regex_match(rel.first.c_str(), result, name); @@ -414,30 +422,32 @@ void rel_on_match(const relations& rels, const std::basic_regex& name, * Calculate the left/right bounds of the error underline, as indices into sentence. */ const std::pair squiggle_bounds(const relations& rels, - const Sentence& sentence, - const size_t& i_fallback, - const Cohort& fallback) { + const Sentence& sentence, const size_t& i_fallback, const Cohort& fallback) { size_t left = i_fallback; size_t right = i_fallback; // If we have several relation targets, prefer leftmost if LEFT, rightmost if RIGHT: rel_on_match(rels, LEFT_RIGHT_DELETE_REL, sentence, - [&](const string& relname, size_t i_trg, const Cohort& trg) { - if(trg.id == 0) { - return; // unexpected, CG should always give id's to relation targets - } - if(i_trg < left) { - left = i_trg; - } - if(i_trg > right) { - right = i_trg; - } - }); + [&](const string& relname, size_t i_trg, const Cohort& trg) { + if (trg.id == 0) { + return; // unexpected, CG should always give id's to relation targets + } + if (i_trg < left) { + left = i_trg; + } + if (i_trg > right) { + right = i_trg; + } + }); if (left < 0) { - std::cerr << "divvun-suggest: WARNING: Left underline boundary relation target " << left << " out of bounds " << std::endl; + std::cerr << "divvun-suggest: WARNING: Left underline boundary " + "relation target " + << left << " out of bounds " << std::endl; left = 0; } if (right >= sentence.cohorts.size()) { - std::cerr << "divvun-suggest: WARNING: Right underline relation target " << right << " out of bounds " << std::endl; + std::cerr + << "divvun-suggest: WARNING: Right underline relation target " + << right << " out of bounds " << std::endl; right = sentence.cohorts.size() - 1; } return std::make_pair(left, right); @@ -460,30 +470,31 @@ const std::pair squiggle_bounds(const relations& rels, * * TODO: return references, not copies */ -vector readings_with_errtype(const Cohort& trg, const ErrId& err_id, bool applies_deletion) { +vector readings_with_errtype( + const Cohort& trg, const ErrId& err_id, bool applies_deletion) { vector filtered(trg.readings.size()); auto it = std::copy_if(trg.readings.begin(), trg.readings.end(), - filtered.begin(), - [&](const Reading& tr) { - bool has_our_errtag = tr.errtypes.find(err_id) != tr.errtypes.end() - || tr.coerrtypes.find(err_id) != tr.coerrtypes.end(); - bool applies_change = tr.added != NotAdded - || !tr.sforms.empty() - || applies_deletion; - return has_our_errtag && applies_change; - }); + filtered.begin(), [&](const Reading& tr) { + bool has_our_errtag = + tr.errtypes.find(err_id) != tr.errtypes.end() || + tr.coerrtypes.find(err_id) != tr.coerrtypes.end(); + bool applies_change = + tr.added != NotAdded || !tr.sforms.empty() || applies_deletion; + return has_our_errtag && applies_change; + }); filtered.resize(std::distance(filtered.begin(), it)); if (filtered.empty()) { vector not_just_other_errtype(trg.readings.size()); auto it = std::copy_if(trg.readings.begin(), trg.readings.end(), - not_just_other_errtype.begin(), - [&](const Reading& tr) { - bool has_our_errtag = tr.errtypes.find(err_id) != tr.errtypes.end() - || tr.coerrtypes.find(err_id) != tr.coerrtypes.end(); - bool no_errtags = tr.errtypes.empty() && tr.coerrtypes.empty(); - return no_errtags || has_our_errtag; - }); - not_just_other_errtype.resize(std::distance(not_just_other_errtype.begin(), it)); + not_just_other_errtype.begin(), [&](const Reading& tr) { + bool has_our_errtag = + tr.errtypes.find(err_id) != tr.errtypes.end() || + tr.coerrtypes.find(err_id) != tr.coerrtypes.end(); + bool no_errtags = tr.errtypes.empty() && tr.coerrtypes.empty(); + return no_errtags || has_our_errtag; + }); + not_just_other_errtype.resize( + std::distance(not_just_other_errtype.begin(), it)); return not_just_other_errtype; } else { @@ -499,20 +510,21 @@ vector readings_with_errtype(const Cohort& trg, const ErrId& err_id, bo * the relation applies. (If there's no ambiguity, we can always * delete). */ -bool do_delete(const Cohort& trg, const ErrId& err_id, const std::set& src_errtypes, const std::set& deletions) { - if(deletions.find(trg.id) == deletions.end()) { +bool do_delete(const Cohort& trg, const ErrId& err_id, + const std::set& src_errtypes, const std::set& deletions) { + if (deletions.find(trg.id) == deletions.end()) { // There is no deletion of this target cohort return false; } - if(src_errtypes.size() < 2) { + if (src_errtypes.size() < 2) { // Just one error type, no need to disambiguate which one has the relation return true; } // There are several err_id's on src; we should only delete // trg in err_id replacement if trg has err_id - for(const auto& tr : trg.readings) { - if (tr.errtypes.find(err_id) != tr.errtypes.end() - || tr.coerrtypes.find(err_id) != tr.coerrtypes.end()) { + for (const auto& tr : trg.readings) { + if (tr.errtypes.find(err_id) != tr.errtypes.end() || + tr.coerrtypes.find(err_id) != tr.coerrtypes.end()) { return true; } } @@ -522,9 +534,9 @@ bool do_delete(const Cohort& trg, const ErrId& err_id, const std::set trg_errtypes_w_co.insert(trg.coerrtypes.begin(), trg.coerrtypes.end()); std::set errtypes_isect; std::set_intersection(trg_errtypes_w_co.begin(), trg_errtypes_w_co.end(), - src_errtypes.begin(), src_errtypes.end(), - std::inserter(errtypes_isect, errtypes_isect.begin())); - if(errtypes_isect.empty()) { + src_errtypes.begin(), src_errtypes.end(), + std::inserter(errtypes_isect, errtypes_isect.begin())); + if (errtypes_isect.empty()) { // No matching err types at all on trg, we can't filter on errtype, allow deletion return true; } @@ -543,68 +555,83 @@ bool both_spaces(char16_t lhs, char16_t rhs) { * (underline), along with a replacement suggestion (or Nothing() if * given bad data). */ -variant, UStringVector>> build_squiggle_replacement( - const Reading& r, - const ErrId& err_id, - const size_t i_c, - const Cohort& src, - const Sentence& sentence, - const size_t orig_beg, - const size_t orig_end, - const size_t i_left, - const size_t i_right, - bool verbose) -{ +variant, UStringVector>> +build_squiggle_replacement(const Reading& r, const ErrId& err_id, + const size_t i_c, const Cohort& src, const Sentence& sentence, + const size_t orig_beg, const size_t orig_end, const size_t i_left, + const size_t i_right, bool verbose) { size_t beg = orig_beg; size_t end = orig_end; std::set deletions; bool src_applies_deletion = false; rel_on_match(r.rels, DELETE_REL, sentence, - [&](const string& relname, size_t i_t, const Cohort& trg) { - deletions.insert(trg.id); - if(trg.errtypes.find(err_id) != trg.errtypes.end()) { src_applies_deletion = true; } - if(trg.coerrtypes.find(err_id) != trg.coerrtypes.end()) { src_applies_deletion = true; } - }); - std::map, pair> add; // position in text:cohort in Sentence + [&](const string& relname, size_t i_t, const Cohort& trg) { + deletions.insert(trg.id); + if (trg.errtypes.find(err_id) != trg.errtypes.end()) { + src_applies_deletion = true; + } + if (trg.coerrtypes.find(err_id) != trg.coerrtypes.end()) { + src_applies_deletion = true; + } + }); + std::map, pair> + add; // position in text:cohort in Sentence // Loop from the leftmost to the rightmost of source and target cohorts: -if(verbose) std::cerr << "\033[1;31m=== err_id=\t" << toUtf8(err_id) << " ===\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;33mr.id=\t" << r.id << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;33msrc.id=\t" << src.id << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;33mi_c=\t" << i_c << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;33mleft=\t" << i_left << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;33mright=\t" << i_right << "\033[0m" << std::endl; - UStringVector reps = {u""}; - UStringVector reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps + if (verbose) + std::cerr << "\033[1;31m=== err_id=\t" << toUtf8(err_id) + << " ===\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;33mr.id=\t" << r.id << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;33msrc.id=\t" << src.id << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;33mi_c=\t" << i_c << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;33mleft=\t" << i_left << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;33mright=\t" << i_right << "\033[0m" << std::endl; + UStringVector reps = { u"" }; + UStringVector + reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps string prev_added_before_blank = ""; std::optional addedcasing = std::nullopt; for (size_t i = i_left; i <= i_right; ++i) { const auto& trg = sentence.cohorts[i]; Casing casing = getCasing(toUtf8(trg.form)); -if(verbose) std::cerr << "\033[1;34mi=\t" << i << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;34mtrg.form=\t'" << toUtf8(trg.form) << "'\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;34mtrg.id=\t" << trg.id << "\033[0m" << std::endl; -if(verbose) std::cerr << "\033[1;35mtrg.raw_pre_blank=\t'" << trg.raw_pre_blank << "'\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;34mi=\t" << i << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;34mtrg.form=\t'" << toUtf8(trg.form) + << "'\033[0m" << std::endl; + if (verbose) + std::cerr << "\033[1;34mtrg.id=\t" << trg.id << "\033[0m" + << std::endl; + if (verbose) + std::cerr << "\033[1;35mtrg.raw_pre_blank=\t'" << trg.raw_pre_blank + << "'\033[0m" << std::endl; UStringVector rep_this_trg; const bool del = do_delete(trg, err_id, src.errtypes, deletions); if (del) { rep_this_trg.push_back(u""); -if(verbose) std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) << "\033[0m" << std::endl; + if (verbose) + std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) + << "\033[0m" << std::endl; } - if(trg.added) { + if (trg.added) { // This word was added, get casing from a non-added word to the right: - for(size_t j = i; j <= i_right; j++) { + for (size_t j = i; j <= i_right; j++) { const auto& right_of_trg = sentence.cohorts[j]; - if(!right_of_trg.added) { + if (!right_of_trg.added) { addedcasing = casing; casing = getCasing(toUtf8(right_of_trg.form)); break; } } } - else if(addedcasing.has_value() && !del) { + else if (addedcasing.has_value() && !del) { // This word was not &ADDED, but is preceded by an added word: casing = addedcasing.value(); addedcasing = std::nullopt; @@ -615,58 +642,83 @@ if(verbose) std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) << "\03 bool applies_deletion = trg.id == src.id && src_applies_deletion; size_t trg_beg = trg.pos; size_t trg_end = trg.pos + trg.form.size(); - for (const Reading& tr : readings_with_errtype(trg, err_id, applies_deletion)) { -if(verbose) std::cerr << "\033[1;32mtr.line=\t" << tr.line << "\033[0m" << std::endl; + for (const Reading& tr : + readings_with_errtype(trg, err_id, applies_deletion)) { + if (verbose) + std::cerr << "\033[1;32mtr.line=\t" << tr.line << "\033[0m" + << std::endl; // Update beg/end: if (tr.added == AddedBeforeBlank) { if (i == 0) { - std::cerr << "divvun-suggest: WARNING: Saw &ADDED-BEFORE-BLANK on initial word, ignoring" << std::endl; + std::cerr + << "divvun-suggest: WARNING: Saw &ADDED-BEFORE-BLANK on " + "initial word, ignoring" + << std::endl; continue; } const auto& pretrg = sentence.cohorts[i - 1]; trg_beg = pretrg.pos + pretrg.form.size(); added_before_blank = true; } - if(tr.added != NotAdded) { // Don't replace existing form if Added/AddedBeforeBlank + if ( + tr.added != + NotAdded) { // Don't replace existing form if Added/AddedBeforeBlank trg_end = trg_beg; } -if(verbose) std::cerr << "\t\033[1;35mr.wf='" << tr.wf << "'\033[0m"; -if(verbose) std::cerr << "\t\033[0;35mr.coerror=" << tr.coerror << "\033[0m"; -if(verbose) std::cerr << "\t\033[0;35mr.suggestwf=" << tr.suggestwf << "\033[0m"; -if(verbose) std::cerr << "\t\033[0;35mr.suggest=" << tr.suggest << "\033[0m" << "\t" << tr.line; + if (verbose) + std::cerr << "\t\033[1;35mr.wf='" << tr.wf << "'\033[0m"; + if (verbose) + std::cerr << "\t\033[0;35mr.coerror=" << tr.coerror + << "\033[0m"; + if (verbose) + std::cerr << "\t\033[0;35mr.suggestwf=" << tr.suggestwf + << "\033[0m"; + if (verbose) + std::cerr << "\t\033[0;35mr.suggest=" << tr.suggest + << "\033[0m" << "\t" << tr.line; // Collect SUGGEST/SUGGESTWF: - if(!del) for(const auto& sf : tr.sforms) { - const auto cased_sf = fromUtf8(withCasing(tr.fixedcase, casing, sf)); - rep_this_trg.push_back(cased_sf); - if (tr.suggestwf) { - if (i == i_c) { - reps_suggestwf.push_back(cased_sf); - } - else { - std::cerr << "divvun-suggest: WARNING: Saw SUGGESTWF on non-central (co-)cohort, ignoring" << std::endl; + if (!del) + for (const auto& sf : tr.sforms) { + const auto cased_sf = + fromUtf8(withCasing(tr.fixedcase, casing, sf)); + rep_this_trg.push_back(cased_sf); + if (tr.suggestwf) { + if (i == i_c) { + reps_suggestwf.push_back(cased_sf); + } + else { + std::cerr + << "divvun-suggest: WARNING: Saw SUGGESTWF on " + "non-central (co-)cohort, ignoring" + << std::endl; + } } + if (verbose) + std::cerr << "\t\t\033[1;36msform=\t'" << sf + << "'\033[0m" << std::endl; } -if(verbose) std::cerr << "\t\t\033[1;36msform=\t'" << sf << "'\033[0m" << std::endl; - } fixedcase |= tr.fixedcase; // for the surface form } // end for readings of target - if(rep_this_trg.empty()) { - const auto cased_sf = fromUtf8(withCasing(fixedcase, casing, toUtf8(trg.form))); + if (rep_this_trg.empty()) { + const auto cased_sf = + fromUtf8(withCasing(fixedcase, casing, toUtf8(trg.form))); rep_this_trg.push_back(cased_sf); } beg = std::min(beg, trg_beg); end = std::max(end, trg_end); UStringVector reps_next; - for(auto& rep: reps) { + for (auto& rep : reps) { // Prepend blank unless at left edge: - const auto pre_blank = i == i_left || added_before_blank - ? "" - : clean_blank(prev_added_before_blank + trg.raw_pre_blank); + const auto pre_blank = + i == i_left || added_before_blank ? + "" : + clean_blank(prev_added_before_blank + trg.raw_pre_blank); // For &ADDED, enclose in blanks (unneeded blanks will get cleaned later): const auto post_blank = trg.added ? u" " : u""; - for(const auto& sform : rep_this_trg) { - reps_next.push_back(rep + fromUtf8(pre_blank) + sform + post_blank); + for (const auto& sform : rep_this_trg) { + reps_next.push_back( + rep + fromUtf8(pre_blank) + sform + post_blank); } } reps.swap(reps_next); @@ -674,14 +726,19 @@ if(verbose) std::cerr << "\t\t\033[1;36msform=\t'" << sf << "'\033[0m" << std } // end for target cohorts // We never want to add whitespace to ends of suggestions (typically deleted words) // and we never want double spaces in suggestions - for(auto& rep: reps) { - rep.erase(std::unique(rep.begin(), rep.end(), both_spaces), rep.end()); // remove double spaces + for (auto& rep : reps) { + rep.erase(std::unique(rep.begin(), rep.end(), both_spaces), + rep.end()); // remove double spaces rep.erase(1 + rep.find_last_not_of(' ')); rep.erase(0, rep.find_first_not_of(' ')); } -if(verbose) for (const auto& sf : reps) { std::cerr << "\033[1;35mreps sf=\t'" << toUtf8(sf) << "'\033[0m\t" << beg << "," << end << std::endl; } + if (verbose) + for (const auto& sf : reps) { + std::cerr << "\033[1;35mreps sf=\t'" << toUtf8(sf) << "'\033[0m\t" + << beg << "," << end << std::endl; + } return std::make_pair(std::make_pair(beg, end), - reps_suggestwf.empty() ? reps : reps_suggestwf); + reps_suggestwf.empty() ? reps : reps_suggestwf); } variant Suggest::cohort_errs(const ErrId& err_id, size_t i_c, @@ -745,16 +802,17 @@ variant Suggest::cohort_errs(const ErrId& err_id, size_t i_c, std::unordered_map msg_replacements; rel_on_match(r.rels, MSG_TEMPLATE_REL, sentence, [&](const string& relname, size_t i_t, const Cohort& trg) { - if(msg_replacements.find(relname) == msg_replacements.end()) { + if (msg_replacements.find(relname) == msg_replacements.end()) { msg_replacements[relname] = trg.form; } else { - msg_replacements[relname] = msg_replacements[relname] + u", " + trg.form; + msg_replacements[relname] = + msg_replacements[relname] + u", " + trg.form; } }); - for(const auto& rep : msg_replacements) { - replaceAll(msg.first, fromUtf8(rep.first), rep.second); - replaceAll(msg.second, fromUtf8(rep.first), rep.second); + for (const auto& rep : msg_replacements) { + replaceAll(msg.first, fromUtf8(rep.first), rep.second); + replaceAll(msg.second, fromUtf8(rep.first), rep.second); } } // End set msg @@ -763,21 +821,26 @@ variant Suggest::cohort_errs(const ErrId& err_id, size_t i_c, auto end = c.pos + c.form.size(); UStringVector rep; for (const Reading& r : c.readings) { - if(r.errtypes.find(err_id) == r.errtypes.end()) { + if (r.errtypes.find(err_id) == r.errtypes.end()) { continue; // We consider sforms of SUGGEST readings in build_squiggle_replacement } // If there are LEFT/RIGHT added relations, add suggestions with those concatenated to our form // TODO: What about our current suggestions of the same error tag? Currently just using wordform const auto squiggle = squiggle_bounds(r.rels, sentence, i_c, c); - std::visit([&](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v) {} - if constexpr (std::is_same_v, UStringVector>>) { - beg = arg.first.first; - end = arg.first.second; - rep.insert(rep.end(), arg.second.begin(), arg.second.end()); - } - }, build_squiggle_replacement(r, err_id, i_c, c, sentence, beg, end, squiggle.first, squiggle.second, verbose)); + std::visit( + [&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + } + if constexpr (std::is_same_v, UStringVector>>) { + beg = arg.first.first; + end = arg.first.second; + rep.insert(rep.end(), arg.second.begin(), arg.second.end()); + } + }, + build_squiggle_replacement(r, err_id, i_c, c, sentence, beg, end, + squiggle.first, squiggle.second, verbose)); } // Avoid unchanging replacements: auto form = text.substr(beg, end - beg); @@ -800,7 +863,7 @@ variant Suggest::cohort_errs(const ErrId& err_id, size_t i_c, */ const string clean_blank(const string& raw) { bool escaped = false; - bool bol = true; // at beginning of line + bool bol = true; // at beginning of line std::ostringstream text; for (const auto& c : raw) { if (bol && c == ':') { @@ -842,33 +905,39 @@ Sentence Suggest::run_sentence(std::istream& is, FlushOn flush_on) { sentence.runstate = Eof; string line; - string raw_blank; // for CG output format + string raw_blank; // for CG output format string readinglines; std::getline(is, - line); // TODO: Why do I need at least one getline before os<< after flushing? + line); // TODO: Why do I need at least one getline before os<< after flushing? do { std::match_results result; std::regex_match(line.c_str(), result, CG_LINE); if (!readinglines.empty() && // Reached end of readings - (result.empty() || (result[3].length() <= 1 && result[8].length() <= 1))) { + (result.empty() || + (result[3].length() <= 1 && result[8].length() <= 1))) { const auto& reading = proc_reading(*generator, readinglines, generate_all_readings); readinglines = ""; - c.errtypes.insert(reading.errtypes.begin(), reading.errtypes.end()); - c.coerrtypes.insert(reading.coerrtypes.begin(), reading.coerrtypes.end()); + c.errtypes.insert( + reading.errtypes.begin(), reading.errtypes.end()); + c.coerrtypes.insert( + reading.coerrtypes.begin(), reading.coerrtypes.end()); if (reading.id != 0) { c.id = reading.id; } c.added = reading.added == NotAdded ? c.added : reading.added; c.readings.push_back(reading); - if(flush_on == NulAndDelimiters) { + if (flush_on == NulAndDelimiters) { if (delimiters.find(c.form) != delimiters.end()) { sentence.runstate = Flushing; } - if (sentence.cohorts.size () >= hard_limit) { + if (sentence.cohorts.size() >= hard_limit) { // We only respect hard_limit when flushing on delimiters (for the Nul only case we assume the calling API ensures requests are of reasonable size) - std::cerr << "divvun-suggest: WARNING: Hard limit of " << hard_limit << " cohorts reached - forcing break." << std::endl; + std::cerr << "divvun-suggest: WARNING: Hard limit of " + << hard_limit + << " cohorts reached - forcing break." + << std::endl; sentence.runstate = Flushing; } } @@ -908,13 +977,14 @@ Sentence Suggest::run_sentence(std::istream& is, FlushOn flush_on) { else if (!result.empty() && result[7].length() != 0) { // flush sentence.runstate = Flushing; } - else if (!result.empty() && result[8].length() != 0) { // traced removed reading + else if (!result.empty() && + result[8].length() != 0) { // traced removed reading c.trace_removed_readings += line + "\n"; } else { // Blank lines without the prefix don't go into text output! } - if(sentence.runstate == Flushing) { + if (sentence.runstate == Flushing) { break; } } while (std::getline(is, line)); @@ -924,7 +994,8 @@ Sentence Suggest::run_sentence(std::istream& is, FlushOn flush_on) { proc_reading(*generator, readinglines, generate_all_readings); readinglines = ""; c.errtypes.insert(reading.errtypes.begin(), reading.errtypes.end()); - c.coerrtypes.insert(reading.coerrtypes.begin(), reading.coerrtypes.end()); + c.coerrtypes.insert( + reading.coerrtypes.begin(), reading.coerrtypes.end()); if (reading.id != 0) { c.id = reading.id; } @@ -1032,17 +1103,19 @@ void Suggest::mk_errs(Sentence& sentence) { // co&errortag (since the relation source is the "main" error): for (size_t i_c = 0; i_c < sentence.cohorts.size(); i_c++) { Cohort& source = sentence.cohorts[i_c]; - for(const auto& r: source.readings) { + for (const auto& r : source.readings) { std::set targets; rel_on_match(r.rels, LEFT_RIGHT_DELETE_REL, sentence, - [&](const string&, size_t i_trg, const Cohort&) { - targets.insert(i_trg); - }); - for(int i_t : targets) { + [&](const string&, size_t i_trg, const Cohort&) { + targets.insert(i_trg); + }); + for (int i_t : targets) { auto& target = sentence.cohorts.at(i_t); - demote_error_to_coerror(source, target.errtypes, target.coerrtypes); - for(Reading& tr : target.readings) { - demote_error_to_coerror(source, tr.errtypes, tr.coerrtypes); + demote_error_to_coerror( + source, target.errtypes, target.coerrtypes); + for (Reading& tr : target.readings) { + demote_error_to_coerror( + source, tr.errtypes, tr.coerrtypes); } } } @@ -1053,7 +1126,8 @@ void Suggest::mk_errs(Sentence& sentence) { std::set c_errtypes; for (size_t i = 0; i < c.readings.size(); ++i) { const Reading& r = c.readings[i]; - if (r.coerror) { // Needed for backwards-compatibility with `COERROR &errtag` readings + if ( + r.coerror) { // Needed for backwards-compatibility with `COERROR &errtag` readings continue; } c_errtypes.insert(r.errtypes.begin(), r.errtypes.end()); @@ -1063,14 +1137,17 @@ void Suggest::mk_errs(Sentence& sentence) { continue; } - std::visit([&](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v) {} - if constexpr (std::is_same_v) { - c.errs.push_back(arg); - sentence.errs.push_back(arg); - } - }, cohort_errs(errtype, i_c, c, sentence, text)); + std::visit( + [&](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + } + if constexpr (std::is_same_v) { + c.errs.push_back(arg); + sentence.errs.push_back(arg); + } + }, + cohort_errs(errtype, i_c, c, sentence, text)); } } // Postprocessing for overlapping errors: @@ -1082,9 +1159,10 @@ vector Suggest::run_errs(std::istream& is) { auto _old = std::locale::global(std::locale("")); } catch (const std::runtime_error& e) { - std::cerr << "divvun-suggest: WARNING: Couldn't set global locale \"\" " - "(locale-specific native environment): " - << e.what() << std::endl; + std::cerr + << "divvun-suggest: WARNING: Couldn't set global locale \"\" " + "(locale-specific native environment): " + << e.what() << std::endl; } return run_sentence(is, FlushOn::Nul).errs; } @@ -1101,15 +1179,10 @@ RunState Suggest::run_json(std::istream& is, std::ostream& os) { if (wantsep) { os << ","; } - os << "[" - << json::str(e.form) << "," - << std::to_string(e.beg) << "," - << std::to_string(e.end) << "," - << json::str(e.err) << "," - << json::str(e.msg.second) << "," - << json::str_arr(e.rep) << "," - << json::str(e.msg.first) - << "]"; + os << "[" << json::str(e.form) << "," << std::to_string(e.beg) << "," + << std::to_string(e.end) << "," << json::str(e.err) << "," + << json::str(e.msg.second) << "," << json::str_arr(e.rep) << "," + << json::str(e.msg.first) << "]"; wantsep = true; } os << "]" @@ -1164,13 +1237,15 @@ RunState Suggest::run_cg(std::istream& is, std::ostream& os) { os << cohort.raw_pre_blank << std::endl; } os << "\"<" << toUtf8(cohort.form) << ">\""; - if(cohort.errs.size() > 0) { os << "\t"; } - for(const Err& err: cohort.errs) { - if(prev_err_start != err.beg) { + if (cohort.errs.size() > 0) { + os << "\t"; + } + for (const Err& err : cohort.errs) { + if (prev_err_start != err.beg) { std::swap(colour_cur, colour_alt); } os << "\t\033[0;31m\033[4m" << toUtf8(err.form) << "\033[0m"; - for(const auto& rep : err.rep) { + for (const auto& rep : err.rep) { os << "\t→ \033[0;32m\033[3m" << toUtf8(rep) << "\033[0m"; } } @@ -1208,9 +1283,10 @@ void Suggest::run(std::istream& is, std::ostream& os, RunMode mode) { auto _old = std::locale::global(std::locale("")); } catch (const std::runtime_error& e) { - std::cerr << "divvun-suggest: WARNING: Couldn't set global locale \"\" " - "(locale-specific native environment): " - << e.what() << std::endl; + std::cerr + << "divvun-suggest: WARNING: Couldn't set global locale \"\" " + "(locale-specific native environment): " + << e.what() << std::endl; } switch (mode) { case RunJson: @@ -1242,7 +1318,7 @@ SortedMsgLangs sortMessageLangs(const MsgMap& msgs, const string& prefer) { } Suggest::Suggest(const hfst::HfstTransducer* generator_, divvun::MsgMap msgs_, - const string& locale_, bool verbose_, bool genall) + const string& locale_, bool verbose_, bool genall) : msgs(msgs_) , locale(locale_) , sortedmsglangs(sortMessageLangs(msgs, locale))