From d7b4b911b0ae5c781c044033a3668e7581504e04 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 10 Jul 2018 17:59:23 +0100 Subject: [PATCH] Abstract interpretation framework: allow selective domain retention This allows users of the `ait` abstract interpretation framework to specify that only certain states are needed after its fixpoint computation is finished. For example, perhaps a particular analysis, or a particular user of that analysis, only needs to know the results at END_FUNCTION instructions. In this case when a state does not need to be retained, and only has a single successor, that state will be altered in place and passed to its successor instruction using std::move, permitting an analysis to optimise away much (potentially all) state copying. In the best case (a long, straight-line function whose state grows with every instruction, such as a constant propagator reading "int x = 1; int y = 2; int z = 3; ...") this reduces analysis space and time complexity from quadratic to linear. --- src/analyses/ai.cpp | 72 +++- src/analyses/ai.h | 29 +- src/goto-programs/goto_program.h | 15 + unit/Makefile | 1 + unit/analyses/ai/ai_selective_domains.cpp | 398 ++++++++++++++++++++++ 5 files changed, 503 insertions(+), 12 deletions(-) create mode 100644 unit/analyses/ai/ai_selective_domains.cpp diff --git a/src/analyses/ai.cpp b/src/analyses/ai.cpp index f0fcb99b1897..69a332b14264 100644 --- a/src/analyses/ai.cpp +++ b/src/analyses/ai.cpp @@ -334,15 +334,36 @@ bool ai_baset::visit( statet ¤t=get_state(l); - for(const auto &to_l : goto_program.get_successors(l)) + // Check if our user wants to be able to inspect this state after the + // fixpoint algorithm terminates. + // Default if the instantiating code hasn't supplied any specific advice is + // to retain all states. + bool must_retain_current_state = must_retain_state(l); + + std::list successors = goto_program.get_successors(l); + for(const auto &to_l : successors) { if(to_l==goto_program.instructions.end()) continue; - std::unique_ptr tmp_state( - make_temporary_state(current)); + // If permissible, alter the existing instruction's state in place; + // otherwise use a temporary copy: + std::unique_ptr tmp_state; - statet &new_values=*tmp_state; + // Note this condition is stricter than `!must_retain_current_state` because + // when we have multiple successors `transform` may do something different + // for different successors, in which case we must use a clean copy of the + // pre-existing state each time. + bool may_work_in_place = + successors.size() == 1 && !must_retain_current_state; + + if(!may_work_in_place) + tmp_state = make_temporary_state(current); + + statet &new_values = + may_work_in_place ? + current : + *tmp_state; bool have_new_values=false; @@ -367,8 +388,7 @@ bool ai_baset::visit( new_values.transform(l, to_l, *this, ns); - if(merge(new_values, l, to_l)) - have_new_values=true; + have_new_values |= merge(std::move(new_values), l, to_l); } if(have_new_values) @@ -378,6 +398,12 @@ bool ai_baset::visit( } } + if(!must_retain_current_state) + { + // If this state isn't needed any longer, destroy it now: + current.make_bottom(); + } + return new_data; } @@ -400,7 +426,7 @@ bool ai_baset::do_function_call( std::unique_ptr tmp_state(make_temporary_state(get_state(l_call))); tmp_state->transform(l_call, l_return, *this, ns); - return merge(*tmp_state, l_call, l_return); + return merge(std::move(*tmp_state), l_call, l_return); } assert(!goto_function.body.instructions.empty()); @@ -420,7 +446,7 @@ bool ai_baset::do_function_call( bool new_data=false; // merge the new stuff - if(merge(*tmp_state, l_call, l_begin)) + if(merge(std::move(*tmp_state), l_call, l_begin)) new_data=true; // do we need to do/re-do the fixedpoint of the body? @@ -445,7 +471,7 @@ bool ai_baset::do_function_call( tmp_state->transform(l_end, l_return, *this, ns); // Propagate those - return merge(*tmp_state, l_end, l_return); + return merge(std::move(*tmp_state), l_end, l_return); } } @@ -523,6 +549,34 @@ bool ai_baset::do_function_call_rec( return new_data; } +/// Determine whether we must retain the state for program point `l` even after +/// fixpoint analysis completes (e.g. for our user to inspect), or if it solely +/// for internal use and can be thrown away as and when suits us. +/// \param l: program point +/// \return true if we must retain the state for that program point +bool ai_baset::must_retain_state(locationt l) const +{ + // If the derived class doesn't specify otherwise, assume the old default + // behaviour: always retain state. + if(!must_retain_state_callback) + return true; + + // Regardless, always keep states with multiple predecessors, which is + // required for termination when loops are present, and saves redundant + // re-investigation of successors for other kinds of convergence. + if(l->incoming_edges.size() > 1) + return true; + + // Similarly retain function head instructions, which may have multiple + // incoming call-graph edges: + if(l->is_function_head()) + return true; + + // Finally, retain states where the particular subclass specifies they + // should be kept: + return must_retain_state_callback(l); +} + void ai_baset::sequential_fixedpoint( const goto_functionst &goto_functions, const namespacet &ns) diff --git a/src/analyses/ai.h b/src/analyses/ai.h index dfe5e183a3d2..771eaac8c41f 100644 --- a/src/analyses/ai.h +++ b/src/analyses/ai.h @@ -96,14 +96,19 @@ class ai_domain_baset // also add // // bool merge(const T &b, locationt from, locationt to); + // and optionally + // bool merge(T &&b, location from, locationt to); // - // This computes the join between "this" and "b". + // These compute the join between "this" and "b". // Return true if "this" has changed. // In the usual case, "b" is the updated state after "from" // and "this" is the state before "to". // // PRECONDITION(from.is_dereferenceable(), "Must not be _::end()") // PRECONDITION(to.is_dereferenceable(), "Must not be _::end()") + // + // The T &&b overload, if any, must leave `b` in such a state that + // `b.make_bottom()` or destruction of `b` could happen next without error. // This method allows an expression to be simplified / evaluated using the // current state. It is used to evaluate assertions and in program @@ -131,7 +136,9 @@ class ai_baset typedef ai_domain_baset statet; typedef goto_programt::const_targett locationt; - ai_baset() + explicit ai_baset( + std::function must_retain_state_callback) : + must_retain_state_callback(must_retain_state_callback) { } @@ -308,6 +315,11 @@ class ai_baset // the work-queue is sorted by location number typedef std::map working_sett; + // Callback that indicates if the user of this instance wants a particular + // state (program point) to be retained for inspection once the fixpoint is + // found. If no callback is supplied we by default retain all states. + std::function must_retain_state_callback; + locationt get_next(working_sett &working_set); void put_in_working_set( @@ -358,8 +370,11 @@ class ai_baset const exprt::operandst &arguments, const namespacet &ns); + bool must_retain_state(locationt) const; + // abstract methods + virtual bool merge(statet &&src, locationt from, locationt to)=0; virtual bool merge(const statet &src, locationt from, locationt to)=0; // for concurrent fixedpoint virtual bool merge_shared( @@ -378,7 +393,8 @@ class ait:public ai_baset { public: // constructor - ait():ai_baset() + ait(std::function must_retain_state_callback = nullptr) : + ai_baset(must_retain_state_callback) { } @@ -436,6 +452,13 @@ class ait:public ai_baset return it->second; } + bool merge(statet &&src, locationt from, locationt to) override + { + statet &dest=get_state(to); + return static_cast(dest).merge( + static_cast(src), from, to); + } + bool merge(const statet &src, locationt from, locationt to) override { statet &dest=get_state(to); diff --git a/src/goto-programs/goto_program.h b/src/goto-programs/goto_program.h index 3232f467672b..17bbe44405e9 100644 --- a/src/goto-programs/goto_program.h +++ b/src/goto-programs/goto_program.h @@ -383,6 +383,21 @@ class goto_programt return false; } + /// Returns true if the instruction is first in its function. + /// Not valid for unreachable instructions -- perform dead-code elimination + /// first if you don't want to confuse those. + bool is_function_head() const + { + if(incoming_edges.size() == 0) + return true; + for(const targett &pred : incoming_edges) + { + if(pred->location_number < location_number) + return true; + } + return false; + } + std::string to_string() const { std::ostringstream instruction_id_builder; diff --git a/unit/Makefile b/unit/Makefile index 496f4cb0143d..42f97c7986f9 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -8,6 +8,7 @@ SRC = unit_tests.cpp \ # Test source files SRC += unit_tests.cpp \ analyses/ai/ai_simplify_lhs.cpp \ + analyses/ai/ai_selective_domains.cpp \ analyses/call_graph.cpp \ analyses/constant_propagator.cpp \ analyses/disconnect_unreachable_nodes_in_graph.cpp \ diff --git a/unit/analyses/ai/ai_selective_domains.cpp b/unit/analyses/ai/ai_selective_domains.cpp new file mode 100644 index 000000000000..9ca6ea4ccbae --- /dev/null +++ b/unit/analyses/ai/ai_selective_domains.cpp @@ -0,0 +1,398 @@ +/*******************************************************************\ + + Module: Unit tests for selectively retaining particular domains in ait + + Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Unit tests for selectively retaining particular domains in ait + +#include + +#include + +#include + +#include + +#include + +#include +#include +#include + +/// A very simple analysis that counts executed instructions along a particular +/// path, taking the max at merge points and saturating at 100 instructions. +/// It should indicate that instructions not within a loop have a certain path +/// length, and those reachable from a loop have a path length of 100 +/// (i.e. potentially infinity) +class instruction_counter_domaint : public ai_domain_baset +{ +public: + + std::unique_ptr path_length; + int used_move_merge; + + instruction_counter_domaint() : used_move_merge(0) + { + } + + instruction_counter_domaint(const instruction_counter_domaint &other) : + used_move_merge(other.used_move_merge) + { + if(other.path_length) + path_length.reset(new int(*other.path_length)); + } + + void transform(locationt, locationt, ai_baset &, const namespacet &) override + { + if(*path_length < 100) + ++*path_length; + } + + void make_bottom() override + { + path_length.reset(); + } + void make_top() override + { + UNREACHABLE; + } + void make_entry() override + { + path_length.reset(new int); + *path_length = 0; + } + bool is_bottom() const override + { + return path_length == nullptr; + } + bool is_top() const override + { + UNREACHABLE; + return true; + } + + bool merge(const instruction_counter_domaint &b, locationt from, locationt to) + { + if(b.is_bottom()) + return false; + + if(!path_length) + { + path_length.reset(new int); + *path_length = 0; + } + + int old_count = *path_length; + // Max path length to get here, saturating at 100: + *path_length = + std::max(*path_length, *b.path_length); + return *path_length != old_count; + } + + bool merge(instruction_counter_domaint &&b, locationt from, locationt to) + { + if(is_bottom() && !b.is_bottom()) + { + path_length = std::move(b.path_length); + ++used_move_merge; + return true; + } + else + return merge((const instruction_counter_domaint &)b, from, to); + } +}; + +class instruction_counter_analysist : public ait +{ +public: + + static bool is_y_assignment(const goto_programt::instructiont &i) + { + if(!i.is_assign()) + return false; + const code_assignt &assign = to_code_assign(i.code); + return + assign.lhs().id() == ID_symbol && + id2string(to_symbol_expr(assign.lhs()).get_identifier()).find('y') != + std::string::npos; + } + + static bool is_y_assignment_location(locationt l) + { + // At assignments to variables with a 'y' in their name, keep the domain. + // Otherwise let ait decide whether to keep it. + return is_y_assignment(*l); + } + + instruction_counter_analysist(bool optimise_domains) : + ait( + optimise_domains ? is_y_assignment_location : nullptr) + { + } +}; + +static code_function_callt make_void_call(const symbol_exprt &function) +{ + code_function_callt ret; + ret.function() = function; + return ret; +} + +SCENARIO( + "ait selective domain storage", "[core][analyses][ai][ai_selective_domains]") +{ + // Make a program like: + + // __CPROVER_start() { f(); } + // + // f() { + // int x = 10; + // int y = 20; + // h(); + // while(x != 0) { + // x -= 1; + // y -= 1; + // g(); + // } + // g(); // To ensure that this later call doesn't overwrite the earlier + // // calls from within the loop (i.e. the top of 'g' is respected + // // as a merge point) + // } + // + // Called from within loop, so should have a long possible path + // g() { int gy = 0; } + // + // Only called before loop, so should have a short path + // h() { int hy = 0; } + + register_language(new_ansi_c_language); + config.ansi_c.set_LP64(); + + goto_modelt goto_model; + + // g: + + symbolt gy; + gy.name = "gy"; + gy.mode = ID_C; + gy.type = signed_int_type(); + + symbolt g; + g.name = "g"; + g.mode = ID_C; + g.type = code_typet({}, void_typet()); + g.value = code_assignt(gy.symbol_expr(), from_integer(0, signed_int_type())); + + // h: + + symbolt hy; + hy.name = "hy"; + hy.mode = ID_C; + hy.type = signed_int_type(); + + symbolt h; + h.name = "h"; + h.mode = ID_C; + h.type = code_typet({}, void_typet()); + h.value = code_assignt(hy.symbol_expr(), from_integer(0, signed_int_type())); + + goto_model.symbol_table.add(g); + goto_model.symbol_table.add(gy); + goto_model.symbol_table.add(h); + goto_model.symbol_table.add(hy); + + // f: + + symbolt x; + x.name = "x"; + x.mode = ID_C; + x.type = signed_int_type(); + + symbolt y; + y.name = "y"; + y.mode = ID_C; + y.type = signed_int_type(); + + goto_model.symbol_table.add(x); + goto_model.symbol_table.add(y); + + code_blockt f_body; + + f_body.copy_to_operands(code_declt(x.symbol_expr())); + f_body.copy_to_operands(code_declt(y.symbol_expr())); + + f_body.copy_to_operands( + code_assignt(x.symbol_expr(), from_integer(10, signed_int_type()))); + f_body.copy_to_operands( + code_assignt(y.symbol_expr(), from_integer(20, signed_int_type()))); + + f_body.copy_to_operands(make_void_call(h.symbol_expr())); + + code_whilet loop; + + loop.cond() = + notequal_exprt(x.symbol_expr(), from_integer(0, signed_int_type())); + + code_blockt loop_body; + loop_body.copy_to_operands( + code_assignt( + x.symbol_expr(), + minus_exprt(x.symbol_expr(), from_integer(1, signed_int_type())))); + loop_body.copy_to_operands( + code_assignt( + y.symbol_expr(), + minus_exprt(y.symbol_expr(), from_integer(1, signed_int_type())))); + loop_body.copy_to_operands(make_void_call(g.symbol_expr())); + + loop.body() = loop_body; + + f_body.move_to_operands(loop); + + f_body.copy_to_operands(make_void_call(g.symbol_expr())); + + symbolt f; + f.name = "f"; + f.mode = ID_C; + f.type = code_typet({}, void_typet()); + f.value = f_body; + + goto_model.symbol_table.add(f); + + // __CPROVER_start: + + symbolt start; + start.name = goto_functionst::entry_point(); + start.mode = ID_C; + start.type = code_typet({}, void_typet()); + start.value = make_void_call(f.symbol_expr()); + + goto_model.symbol_table.add(start); + + null_message_handlert nullout; + goto_convert(goto_model, nullout); + + WHEN("The target program is analysed without optimised domain storage") + { + instruction_counter_analysist unopt_analysis(false); + unopt_analysis(goto_model); + + THEN("No state should be bottom") + { + forall_goto_functions(f_it, goto_model.goto_functions) + { + forall_goto_program_instructions(i_it, f_it->second.body) + { + REQUIRE(!unopt_analysis[i_it].is_bottom()); + } + } + } + + THEN("The first y-assignment should have a short path; " + "the second should have a long one") + { + const auto &f_instructions = + goto_model.goto_functions.function_map.at("f").body.instructions; + + std::vector y_assignments; + for(auto l = f_instructions.begin(); l != f_instructions.end(); ++l) + if(instruction_counter_analysist::is_y_assignment_location(l)) + y_assignments.push_back(l); + + REQUIRE(y_assignments.size() == 2); + REQUIRE(!unopt_analysis[y_assignments[0]].is_bottom()); + REQUIRE(*(unopt_analysis[y_assignments[0]].path_length) < 100); + REQUIRE(!unopt_analysis[y_assignments[1]].is_bottom()); + REQUIRE(*(unopt_analysis[y_assignments[1]].path_length) == 100); + } + + THEN("The assignment in function 'g' should have a long path") + { + const auto &g_instructions = + goto_model.goto_functions.function_map.at("g").body.instructions; + REQUIRE(g_instructions.begin()->is_assign()); + REQUIRE(!unopt_analysis[g_instructions.begin()].is_bottom()); + REQUIRE(*unopt_analysis[g_instructions.begin()].path_length == 100); + } + + THEN("The assignment in function 'h' should have a short path") + { + const auto &h_instructions = + goto_model.goto_functions.function_map.at("h").body.instructions; + REQUIRE(h_instructions.begin()->is_assign()); + REQUIRE(!unopt_analysis[h_instructions.begin()].is_bottom()); + REQUIRE(*unopt_analysis[h_instructions.begin()].path_length < 100); + } + + WHEN("The target program is reanalysed with optimised domain storage") + { + instruction_counter_analysist opt_analysis(false); + opt_analysis(goto_model); + + THEN("At every location, the optimised analysis should either agree " + "with the unoptimised one, or have a BOTTOM state") + { + forall_goto_functions(f_it, goto_model.goto_functions) + { + forall_goto_program_instructions(i_it, f_it->second.body) + { + const auto &unopt_state = unopt_analysis[i_it]; + const auto &opt_state = opt_analysis[i_it]; + if(!opt_state.is_bottom()) + { + REQUIRE(!unopt_state.is_bottom()); + REQUIRE(*unopt_state.path_length == *opt_state.path_length); + } + } + } + } + + THEN("The optimised analysis state should be retained at 'y', 'gy' and " + "'hy' assignments") + { + bool found_any_y_assignment; + + forall_goto_functions(f_it, goto_model.goto_functions) + { + forall_goto_program_instructions(i_it, f_it->second.body) + { + if(instruction_counter_analysist::is_y_assignment_location(i_it)) + { + found_any_y_assignment = true; + REQUIRE(!opt_analysis[i_it].is_bottom()); + } + } + } + + REQUIRE(found_any_y_assignment); + } + + THEN("The optimised analysis state should be retained at any " + "control-flow merge point") + { + forall_goto_functions(f_it, goto_model.goto_functions) + { + forall_goto_program_instructions(i_it, f_it->second.body) + { + if(i_it->incoming_edges.size() > 1) + REQUIRE(!opt_analysis[i_it].is_bottom()); + } + } + } + + THEN("The optimised analysis state should be retained at the top of all " + "functions") + { + forall_goto_functions(f_it, goto_model.goto_functions) + { + REQUIRE(!f_it->second.body.instructions.empty()); + REQUIRE( + !opt_analysis[f_it->second.body.instructions.begin()].is_bottom()); + } + } + } + } +}