From 7952f2c21637ad5620f4b295dd0eeaddd1bba630 Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Wed, 21 Feb 2018 10:24:51 +0000 Subject: [PATCH] Add option to generate function body to goto-instrument --- CHANGELOG | 7 + doc/cbmc-user-manual.md | 158 ++++++ .../main.c | 14 + .../test.desc | 8 + .../main.c | 6 + .../test.desc | 8 + .../main.c | 9 + .../test.desc | 8 + .../main.c | 45 ++ .../test.desc | 17 + .../main.c | 12 + .../test.desc | 7 + .../main.c | 13 + .../test.desc | 10 + .../main.c | 12 + .../test.desc | 10 + .../main.c | 14 + .../test.desc | 9 + .../main.c | 21 + .../test.desc | 10 + .../main.c | 25 + .../test.desc | 14 + .../generate-function-body/main.c | 21 + .../generate-function-body/test.desc | 8 + .../goto_instrument_parse_options.cpp | 16 +- .../goto_instrument_parse_options.h | 6 +- src/goto-programs/Makefile | 1 + .../generate_function_bodies.cpp | 489 ++++++++++++++++++ src/goto-programs/generate_function_bodies.h | 84 +++ src/util/string_utils.cpp | 7 + src/util/string_utils.h | 2 + 31 files changed, 1069 insertions(+), 2 deletions(-) create mode 100644 regression/goto-instrument/generate-function-body-assert-false-assume-false/main.c create mode 100644 regression/goto-instrument/generate-function-body-assert-false-assume-false/test.desc create mode 100644 regression/goto-instrument/generate-function-body-assert-false/main.c create mode 100644 regression/goto-instrument/generate-function-body-assert-false/test.desc create mode 100644 regression/goto-instrument/generate-function-body-assume-false/main.c create mode 100644 regression/goto-instrument/generate-function-body-assume-false/test.desc create mode 100644 regression/goto-instrument/generate-function-body-complex-struct/main.c create mode 100644 regression/goto-instrument/generate-function-body-complex-struct/test.desc create mode 100644 regression/goto-instrument/generate-function-body-const-pointer-to-non-const/main.c create mode 100644 regression/goto-instrument/generate-function-body-const-pointer-to-non-const/test.desc create mode 100644 regression/goto-instrument/generate-function-body-havoc-globals/main.c create mode 100644 regression/goto-instrument/generate-function-body-havoc-globals/test.desc create mode 100644 regression/goto-instrument/generate-function-body-havoc-params/main.c create mode 100644 regression/goto-instrument/generate-function-body-havoc-params/test.desc create mode 100644 regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/main.c create mode 100644 regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/test.desc create mode 100644 regression/goto-instrument/generate-function-body-struct-with-const-member/main.c create mode 100644 regression/goto-instrument/generate-function-body-struct-with-const-member/test.desc create mode 100644 regression/goto-instrument/generate-function-body-union-with-const-member/main.c create mode 100644 regression/goto-instrument/generate-function-body-union-with-const-member/test.desc create mode 100644 regression/goto-instrument/generate-function-body/main.c create mode 100644 regression/goto-instrument/generate-function-body/test.desc create mode 100644 src/goto-programs/generate_function_bodies.cpp create mode 100644 src/goto-programs/generate_function_bodies.h diff --git a/CHANGELOG b/CHANGELOG index 35df4513382..9a884ca05e7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +5.9 +=== +* GOTO-INSTRUMENT: --generate-function-body can be used to + generate bodies for functions that don't have a body + in the goto code. This supercedes the functionality + of --undefined-function-is-assume-false + 5.8 === diff --git a/doc/cbmc-user-manual.md b/doc/cbmc-user-manual.md index f947705c69d..e23aff11df9 100644 --- a/doc/cbmc-user-manual.md +++ b/doc/cbmc-user-manual.md @@ -2014,6 +2014,164 @@ Flag | Check `--uninitialized-check` | add checks for uninitialized locals (experimental) `--error-label label` | check that given label is unreachable +#### Generating function bodies + +Sometimes implementations for called functions are not available in the goto +program, or it is desirable to replace bodies of functions with certain +predetermined stubs (for example to confirm that these functions are never +called, or to indicate that these functions will never return). For this purpose +goto-instrument provides the `--generate-function-body` option, that takes a +regular expression (in [ECMAScript syntax] +(http://en.cppreference.com/w/cpp/regex/ecmascript)) that describes the names of +the functions to generate. Note that this will only generate bodies for +functions that do not already have one; If one wishes to replace the body of a +function with an existing definition, the `--remove-function-body` option can be +used to remove the body of the function prior to generating a new one. + +The shape of the stub itself can be chosen with the +`--generate-function-body-options` parameter, which can take the following +values: + + Option | Result +-----------------------------|------------------------------------------------------------- + `nondet-return` | Do nothing and return a nondet result (this is the default) + `assert-false` | Make the body contain an assert(false) + `assume-false` | Make the body contain an assume(false) + `assert-false-assume-false` | Combines assert-false and assume-false + `havoc` | Set the contents of parameters and globals to nondet + +The various combinations of assert-false and assume-false can be used to +indicate that functions shouldn't be called, that they will never return or +both. + +Example: We have a program like this: + + // error_example.c + #include + + void api_error(void); + void internal_error(void); + + int main(void) + { + int arr[10] = {1,2,3,4,5, 6, 7, 8, 9, 10}; + int sum = 0; + for(int i = 1; i < 10; ++i) + { + sum += arr[i]; + } + if(sum != 55) + { + // we made a mistake when calculating the sum + internal_error(); + } + if(rand() < 0) + { + // we think this cannot happen + api_error(); + } + return 0; + } + +Now, we can compile the program and detect that the error functions are indeed +called by invoking these commands + + goto-cc error_example.c -o error_example.goto + # Replace all functions ending with _error + # (Excluding those starting with __) + # With ones that have an assert(false) body + goto-instrument error_example.goto error_example_replaced.goto \ + --generate-function-body '(?!__).*_error' \ + --generate-function-body-options assert-false + cbmc error_example_replaced.goto + +Which gets us the output + +> ** Results: +> [internal_error.assertion.1] assertion false: FAILURE +> [api_error.assertion.1] assertion false: FAILURE +> +> +> ** 2 of 2 failed (2 iterations) +> VERIFICATION FAILED + +As opposed to the verification success we would have gotten without the +generation. + + +The havoc option takes further parameters `globals` and `params` with this +syntax: `havoc[,globals:][,params:]` (where the square brackets +indicate an optional part). The regular expressions have the same format as the +those for the `--generate-function-body` option and indicate which globals and +function parameters should be set to nondet. All regular expressions require +exact matches (i.e. the regular expression `a|b` will match 'a' and 'b' but not +'adrian' or 'bertha'). + +Example: With a C program like this + + struct Complex { + double real; + double imag; + }; + + struct Complex AGlobalComplex; + int do_something_with_complex(struct Complex *complex); + +And the command line + + goto-instrument in.goto out.goto + --generate-function-body do_something_with_complex + --generate-function-body-options + 'havoc,params:.*,globals:AGlobalComplex' + +The goto code equivalent of the following will be generated: + + int do_something_with_complex(struct Complex *complex) + { + if(complex) + { + complex->real = nondet_double(); + complex->imag = nondet_double(); + } + AGlobalComplex.real = nondet_double(); + AGlobalComplex.imag = nondet_double(); + return nondet_int(); + } + +A note on limitations: Because only static information is used for code +generation, arrays of unknown size and pointers will not be affected by this; +Which means that for code like this: + + struct Node { + int val; + struct Node *next; + }; + + void do_something_with_node(struct Node *node); + +Code like this will be generated: + + void do_something_with_node(struct Node *node) + { + if(node) + { + node->val = nondet_int(); + node->next = nondet_0(); + } + } + +Note that no attempt to follow the `next` pointer is made. If an array of +unknown (or 0) size is encountered, a diagnostic is emitted and the array is not +further examined. + +Some care must be taken when choosing the regular expressions for globals and +functions. Names starting with `__` are reserved for internal purposes; For +example, replacing functions or setting global variables with the `__CPROVER` +prefix might make analysis impossible. To avoid doing this by accident, negative +lookahead can be used. For example, `(?!__).*` matches all names not starting +with `__`. + + \subsection man_instrumentation-api The CPROVER API Reference The following sections summarize the functions available to programs diff --git a/regression/goto-instrument/generate-function-body-assert-false-assume-false/main.c b/regression/goto-instrument/generate-function-body-assert-false-assume-false/main.c new file mode 100644 index 00000000000..7b27c3728b4 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assert-false-assume-false/main.c @@ -0,0 +1,14 @@ +#include + +void crashes_program(void); + +int main(void) +{ + int flag; + if(flag) + { + crashes_program(); + assert(0); + } + return 0; +} diff --git a/regression/goto-instrument/generate-function-body-assert-false-assume-false/test.desc b/regression/goto-instrument/generate-function-body-assert-false-assume-false/test.desc new file mode 100644 index 00000000000..dc23de4238c --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assert-false-assume-false/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--generate-function-body crashes_program --generate-function-body-options assert-false-assume-false +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\[main.assertion.1\] assertion 0: SUCCESS$ +^\[crashes_program.assertion.1\] assertion false: FAILURE$ diff --git a/regression/goto-instrument/generate-function-body-assert-false/main.c b/regression/goto-instrument/generate-function-body-assert-false/main.c new file mode 100644 index 00000000000..c0b34376909 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assert-false/main.c @@ -0,0 +1,6 @@ +void do_not_call_this(void); + +int main(void) +{ + do_not_call_this(); +} diff --git a/regression/goto-instrument/generate-function-body-assert-false/test.desc b/regression/goto-instrument/generate-function-body-assert-false/test.desc new file mode 100644 index 00000000000..eb93c062a86 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assert-false/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--generate-function-body do_not_call_this --generate-function-body-options assert-false +^EXIT=10$ +^SIGNAL=0$ +^\[do_not_call_this.assertion.1\] assertion false: FAILURE$ +-- +^warning: ignoring diff --git a/regression/goto-instrument/generate-function-body-assume-false/main.c b/regression/goto-instrument/generate-function-body-assume-false/main.c new file mode 100644 index 00000000000..7db2cc97833 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assume-false/main.c @@ -0,0 +1,9 @@ +#include + +void will_not_return(void); + +int main(void) +{ + will_not_return(); + assert(0); +} diff --git a/regression/goto-instrument/generate-function-body-assume-false/test.desc b/regression/goto-instrument/generate-function-body-assume-false/test.desc new file mode 100644 index 00000000000..8a0f9d9cc4d --- /dev/null +++ b/regression/goto-instrument/generate-function-body-assume-false/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--generate-function-body will_not_return --generate-function-body-options assume-false +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/goto-instrument/generate-function-body-complex-struct/main.c b/regression/goto-instrument/generate-function-body-complex-struct/main.c new file mode 100644 index 00000000000..1fe2726376d --- /dev/null +++ b/regression/goto-instrument/generate-function-body-complex-struct/main.c @@ -0,0 +1,45 @@ +#include +#include + +struct ComplexStruct +{ + struct + { + int some_variable; + const int some_constant; + } struct_contents; + union { + int some_integer; + double some_double; + } union_contents; + struct ComplexStruct *pointer_contents; +}; + +void havoc_complex_struct(struct ComplexStruct *p); + +int main(void) +{ + struct ComplexStruct child_struct = {{11, 21}, {.some_integer = 31}, NULL}; + struct ComplexStruct main_struct = { + {10, 20}, {.some_double = 13.0}, &child_struct}; + assert(main_struct.pointer_contents->struct_contents.some_variable == 11); + assert(main_struct.struct_contents.some_variable == 10); + assert(main_struct.struct_contents.some_constant == 20); + assert( + main_struct.union_contents.some_double < 14.0 && + main_struct.union_contents.some_double > 12.0); + + havoc_complex_struct(&main_struct); + + // everything (except constants) in the main struct was havocced + assert(main_struct.pointer_contents->struct_contents.some_variable == 11); + assert(main_struct.struct_contents.some_variable == 10); + assert(main_struct.struct_contents.some_constant == 20); + assert( + main_struct.union_contents.some_double < 14.0 && + main_struct.union_contents.some_double > 12.0); + // the child struct was NOT havocced because we can't follow pointers + assert(child_struct.struct_contents.some_variable == 11); + assert(child_struct.union_contents.some_integer == 31); + assert(!child_struct.pointer_contents); +} diff --git a/regression/goto-instrument/generate-function-body-complex-struct/test.desc b/regression/goto-instrument/generate-function-body-complex-struct/test.desc new file mode 100644 index 00000000000..ca9175398e3 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-complex-struct/test.desc @@ -0,0 +1,17 @@ +CORE +main.c +--generate-function-body 'havoc_complex_struct' --generate-function-body-options 'havoc,params:.*' +^SIGNAL=0$ +^EXIT=10$ +\[main.assertion.1\] assertion main_struct.pointer_contents->struct_contents.some_variable == 11: SUCCESS +\[main.assertion.2\] assertion main_struct.struct_contents.some_variable == 10: SUCCESS +\[main.assertion.3\] assertion main_struct.struct_contents.some_constant == 20: SUCCESS +\[main.assertion.4\] assertion main_struct.union_contents.some_double < 14.0 && main_struct.union_contents.some_double > 12.0: SUCCESS +\[main.assertion.5\] assertion main_struct.pointer_contents->struct_contents.some_variable == 11: FAILURE +\[main.assertion.6\] assertion main_struct.struct_contents.some_variable == 10: FAILURE +\[main.assertion.7\] assertion main_struct.struct_contents.some_constant == 20: SUCCESS +\[main.assertion.8\] assertion main_struct.union_contents.some_double < 14.0 && main_struct.union_contents.some_double > 12.0: FAILURE +\[main.assertion.9\] assertion child_struct.struct_contents.some_variable == 11: SUCCESS +\[main.assertion.10\] assertion child_struct.union_contents.some_integer == 31: SUCCESS +\[main.assertion.11\] assertion !child_struct.pointer_contents: SUCCESS +^VERIFICATION FAILED$ diff --git a/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/main.c b/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/main.c new file mode 100644 index 00000000000..69086bfd24b --- /dev/null +++ b/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/main.c @@ -0,0 +1,12 @@ +#include + +void change_pointer_target_of_const_pointer( + int *const constant_pointer_to_nonconst); + +int main(void) +{ + int x = 10; + change_pointer_target_of_const_pointer(&x); + assert(x == 10); + return 0; +} diff --git a/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/test.desc b/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/test.desc new file mode 100644 index 00000000000..a3777b0ff39 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-const-pointer-to-non-const/test.desc @@ -0,0 +1,7 @@ +CORE +main.c +--generate-function-body change_pointer_target_of_const_pointer --generate-function-body-options havoc,params:.* +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.1\] assertion x == 10: FAILURE$ +-- diff --git a/regression/goto-instrument/generate-function-body-havoc-globals/main.c b/regression/goto-instrument/generate-function-body-havoc-globals/main.c new file mode 100644 index 00000000000..307b8498d7b --- /dev/null +++ b/regression/goto-instrument/generate-function-body-havoc-globals/main.c @@ -0,0 +1,13 @@ +#include + +int global = 10; +const int constant_global = 10; + +void touches_globals(void); + +int main(void) +{ + touches_globals(); + assert(global == 10); + assert(constant_global == 10); +} diff --git a/regression/goto-instrument/generate-function-body-havoc-globals/test.desc b/regression/goto-instrument/generate-function-body-havoc-globals/test.desc new file mode 100644 index 00000000000..2b516e3a273 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-havoc-globals/test.desc @@ -0,0 +1,10 @@ +CORE +main.c +--generate-function-body touches_globals --generate-function-body-options 'havoc,globals:(?!__).*' +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\[main.assertion.1\] assertion global == 10: FAILURE$ +^\[main.assertion.2\] assertion constant_global == 10: SUCCESS$ +-- +^warning: ignoring diff --git a/regression/goto-instrument/generate-function-body-havoc-params/main.c b/regression/goto-instrument/generate-function-body-havoc-params/main.c new file mode 100644 index 00000000000..cde490c3b20 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-havoc-params/main.c @@ -0,0 +1,12 @@ +#include + +void touches_parameter(int *param, const int *const_param); + +int main(void) +{ + int parameter = 10; + int constant_parameter = 10; + touches_parameter(¶meter, &constant_parameter); + assert(parameter == 10); + assert(constant_parameter == 10); +} diff --git a/regression/goto-instrument/generate-function-body-havoc-params/test.desc b/regression/goto-instrument/generate-function-body-havoc-params/test.desc new file mode 100644 index 00000000000..19d9d05976d --- /dev/null +++ b/regression/goto-instrument/generate-function-body-havoc-params/test.desc @@ -0,0 +1,10 @@ +CORE +main.c +--generate-function-body touches_parameter --generate-function-body-options 'havoc,params:.*' +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\[main.assertion.1\] assertion parameter == 10: FAILURE$ +^\[main.assertion.2\] assertion constant_parameter == 10: SUCCESS$ +-- +^warning: ignoring diff --git a/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/main.c b/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/main.c new file mode 100644 index 00000000000..890ae0482b2 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/main.c @@ -0,0 +1,14 @@ +#include + +void change_target_of_pointer_to_pointer_to_const( + const int **pointer_to_pointer_to_const); + +int main(void) +{ + int x = 10; + int *px = &x; + assert(*px == 10); + change_target_of_pointer_to_pointer_to_const(&px); + assert(x == 10); + assert(*px == 10); +} diff --git a/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/test.desc b/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/test.desc new file mode 100644 index 00000000000..7df3009a3ed --- /dev/null +++ b/regression/goto-instrument/generate-function-body-pointer-to-pointer-to-const/test.desc @@ -0,0 +1,9 @@ +CORE +main.c +--generate-function-body change_target_of_pointer_to_pointer_to_const --generate-function-body-options havoc,params:.* +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.1\] assertion \*px == 10: SUCCESS$ +^\[main.assertion.2\] assertion x == 10: SUCCESS$ +^\[main.assertion.3\] assertion \*px == 10: FAILURE$ +-- diff --git a/regression/goto-instrument/generate-function-body-struct-with-const-member/main.c b/regression/goto-instrument/generate-function-body-struct-with-const-member/main.c new file mode 100644 index 00000000000..a02a053c857 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-struct-with-const-member/main.c @@ -0,0 +1,21 @@ +#include + +struct WithConstMember +{ + int non_const; + const int is_const; +}; + +struct WithConstMember globalStruct = {10, 20}; +void havoc_struct(struct WithConstMember *s); + +int main(void) +{ + struct WithConstMember paramStruct = {11, 21}; + havoc_struct(¶mStruct); + assert(globalStruct.non_const == 10); + assert(globalStruct.is_const == 20); + assert(paramStruct.non_const == 11); + assert(paramStruct.is_const == 21); + return 0; +} diff --git a/regression/goto-instrument/generate-function-body-struct-with-const-member/test.desc b/regression/goto-instrument/generate-function-body-struct-with-const-member/test.desc new file mode 100644 index 00000000000..68a7e35e8e7 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-struct-with-const-member/test.desc @@ -0,0 +1,10 @@ +CORE +main.c +--generate-function-body 'havoc_struct' --generate-function-body-options 'havoc,params:.*,globals:(?!__).*' +^SIGNAL=0$ +^EXIT=10$ +^\[main.assertion.1\] assertion globalStruct.non_const == 10: FAILURE$ +^\[main.assertion.2\] assertion globalStruct.is_const == 20: SUCCESS$ +^\[main.assertion.3\] assertion paramStruct.non_const == 11: FAILURE$ +^\[main.assertion.4\] assertion paramStruct.is_const == 21: SUCCESS$ +^VERIFICATION FAILED$ diff --git a/regression/goto-instrument/generate-function-body-union-with-const-member/main.c b/regression/goto-instrument/generate-function-body-union-with-const-member/main.c new file mode 100644 index 00000000000..56913e9baa1 --- /dev/null +++ b/regression/goto-instrument/generate-function-body-union-with-const-member/main.c @@ -0,0 +1,25 @@ +#include + +union WithConstMember { + int non_const; + const int is_const; +}; + +union WithConstMember globalUnion; +void havoc_union(union WithConstMember *u); + +int main(void) +{ + union WithConstMember paramUnion; + globalUnion.non_const = 10; + paramUnion.non_const = 20; + assert(globalUnion.non_const == 10); + assert(globalUnion.is_const == 10); + assert(paramUnion.non_const == 20); + assert(paramUnion.is_const == 20); + havoc_union(¶mUnion); + assert(globalUnion.non_const == 10); + assert(globalUnion.is_const == 10); + assert(paramUnion.non_const == 20); + assert(paramUnion.is_const == 20); +} diff --git a/regression/goto-instrument/generate-function-body-union-with-const-member/test.desc b/regression/goto-instrument/generate-function-body-union-with-const-member/test.desc new file mode 100644 index 00000000000..726ddc45c4a --- /dev/null +++ b/regression/goto-instrument/generate-function-body-union-with-const-member/test.desc @@ -0,0 +1,14 @@ +CORE +main.c +--generate-function-body 'havoc_union' --generate-function-body-options 'havoc,params:.*,globals:(?!__).*' +^SIGNAL=0$ +^EXIT=10$ +^\[main.assertion.1\] assertion globalUnion.non_const == 10: SUCCESS$ +^\[main.assertion.2\] assertion globalUnion.is_const == 10: SUCCESS$ +^\[main.assertion.3\] assertion paramUnion.non_const == 20: SUCCESS$ +^\[main.assertion.4\] assertion paramUnion.is_const == 20: SUCCESS$ +^\[main.assertion.5\] assertion globalUnion.non_const == 10: FAILURE$ +^\[main.assertion.6\] assertion globalUnion.is_const == 10: FAILURE$ +^\[main.assertion.7\] assertion paramUnion.non_const == 20: FAILURE$ +^\[main.assertion.8\] assertion paramUnion.is_const == 20: FAILURE$ +^VERIFICATION FAILED$ diff --git a/regression/goto-instrument/generate-function-body/main.c b/regression/goto-instrument/generate-function-body/main.c new file mode 100644 index 00000000000..b4937a26f4c --- /dev/null +++ b/regression/goto-instrument/generate-function-body/main.c @@ -0,0 +1,21 @@ +#include + +void should_not_be_replaced(void) +{ + __CPROVER_assume(0); +} + +void should_be_generated(void); + +int main(void) +{ + int flag; + int does_not_get_reached = 0; + if(flag) + { + should_not_be_replaced(); + assert(does_not_get_reached); + } + should_be_generated(); + return 0; +} diff --git a/regression/goto-instrument/generate-function-body/test.desc b/regression/goto-instrument/generate-function-body/test.desc new file mode 100644 index 00000000000..d61fa11dc44 --- /dev/null +++ b/regression/goto-instrument/generate-function-body/test.desc @@ -0,0 +1,8 @@ +CORE +main.c +--generate-function-body '(?!__).*' --generate-function-body-options assert-false +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\[main.assertion.1\] assertion does_not_get_reached: SUCCESS$ +^\[should_be_generated.assertion.1\] assertion false: FAILURE$ diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index d5224337440..a628391a106 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -1448,6 +1448,19 @@ void goto_instrument_parse_optionst::instrument_goto_program() throw 0; } + if(cmdline.isset("generate-function-body")) + { + auto generate_implementation = generate_function_bodies_factory( + cmdline.get_value("generate-function-body-options"), + goto_model.symbol_table, + *message_handler); + generate_function_bodies( + std::regex(cmdline.get_value("generate-function-body")), + *generate_implementation, + goto_model, + *message_handler); + } + // recalculate numbers, etc. goto_model.goto_functions.update(); } @@ -1522,9 +1535,10 @@ void goto_instrument_parse_optionst::help() " --check-invariant function instruments invariant checking function\n" " --remove-pointers converts pointer arithmetic to base+offset expressions\n" // NOLINT(*) " --splice-call caller,callee prepends a call to callee in the body of caller\n" // NOLINT(*) + " --undefined-function-is-assume-false\n" // NOLINTNEXTLINE(whitespace/line_length) - " --undefined-function-is-assume-false\n" // NOLINTNEXTLINE(whitespace/line_length) " convert each call to an undefined function to assume(false)\n" + HELP_REPLACE_FUNCTION_BODY "\n" "Loop transformations:\n" " --k-induction check loops with k-induction\n" diff --git a/src/goto-instrument/goto_instrument_parse_options.h b/src/goto-instrument/goto_instrument_parse_options.h index 4c9ead8d0dc..a25fba2ea80 100644 --- a/src/goto-instrument/goto_instrument_parse_options.h +++ b/src/goto-instrument/goto_instrument_parse_options.h @@ -24,6 +24,8 @@ Author: Daniel Kroening, kroening@kroening.com #include +#include + // clang-format off #define GOTO_INSTRUMENT_OPTIONS \ "(all)" \ @@ -86,7 +88,9 @@ Author: Daniel Kroening, kroening@kroening.com "(undefined-function-is-assume-false)" \ "(remove-function-body):"\ "(splice-call):" \ - OPT_REMOVE_CALLS_NO_BODY + OPT_REMOVE_CALLS_NO_BODY \ + OPT_REPLACE_FUNCTION_BODY + // clang-format on class goto_instrument_parse_optionst: diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index 6ac8abfae36..378bb4694f8 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -53,6 +53,7 @@ SRC = basic_blocks.cpp \ remove_vector.cpp \ remove_virtual_functions.cpp \ replace_java_nondet.cpp \ + generate_function_bodies.cpp \ resolve_inherited_component.cpp \ safety_checker.cpp \ set_properties.cpp \ diff --git a/src/goto-programs/generate_function_bodies.cpp b/src/goto-programs/generate_function_bodies.cpp new file mode 100644 index 00000000000..64d12ce0d4a --- /dev/null +++ b/src/goto-programs/generate_function_bodies.cpp @@ -0,0 +1,489 @@ +/*******************************************************************\ + +Module: Replace bodies of goto functions + +Author: Diffblue Ltd. + +\*******************************************************************/ + +#include "generate_function_bodies.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +void generate_function_bodiest::generate_function_body( + goto_functiont &function, + symbol_tablet &symbol_table, + const irep_idt &function_name) const +{ + PRECONDITION(!function.body_available()); + generate_parameter_names(function, symbol_table, function_name); + generate_function_body_impl(function, symbol_table, function_name); +} + +void generate_function_bodiest::generate_parameter_names( + goto_functiont &function, + symbol_tablet &symbol_table, + const irep_idt &function_name) const +{ + const namespacet ns(symbol_table); + int param_counter = 0; + for(auto ¶meter : function.type.parameters()) + { + if(parameter.get_identifier().empty()) + { + const std::string param_base_name = + parameter.get_base_name().empty() + ? "__param$" + std::to_string(param_counter++) + : id2string(parameter.get_base_name()); + irep_idt new_param_identifier = + id2string(function_name) + "::" + param_base_name; + parameter.set_base_name(param_base_name); + parameter.set_identifier(new_param_identifier); + parameter_symbolt new_param_sym; + new_param_sym.name = new_param_identifier; + new_param_sym.type = parameter.type(); + new_param_sym.base_name = param_base_name; + auto const &function_symbol = symbol_table.lookup_ref(function_name); + new_param_sym.mode = function_symbol.mode; + new_param_sym.module = function_symbol.module; + new_param_sym.location = function_symbol.location; + symbol_table.add(new_param_sym); + } + } + auto &function_symbol = symbol_table.get_writeable_ref(function_name); + function_symbol.type = function.type; +} + +class assume_false_generate_function_bodiest : public generate_function_bodiest +{ +protected: + void generate_function_body_impl( + goto_functiont &function, + const symbol_tablet &symbol_table, + const irep_idt &function_name) const override + { + auto const &function_symbol = symbol_table.lookup_ref(function_name); + // NOLINTNEXTLINE + auto add_instruction = [&]() { + auto instruction = function.body.add_instruction(); + instruction->function = function_name; + instruction->source_location = function_symbol.location; + return instruction; + }; + auto assume_instruction = add_instruction(); + assume_instruction->make_assumption(false_exprt()); + auto end_instruction = add_instruction(); + end_instruction->make_end_function(); + } +}; + +class assert_false_generate_function_bodiest : public generate_function_bodiest +{ +protected: + void generate_function_body_impl( + goto_functiont &function, + const symbol_tablet &symbol_table, + const irep_idt &function_name) const override + { + auto const &function_symbol = symbol_table.lookup_ref(function_name); + // NOLINTNEXTLINE + auto add_instruction = [&]() { + auto instruction = function.body.add_instruction(); + instruction->function = function_name; + instruction->source_location = function_symbol.location; + instruction->source_location.set_function(function_name); + return instruction; + }; + auto assert_instruction = add_instruction(); + assert_instruction->make_assertion(false_exprt()); + const namespacet ns(symbol_table); + std::ostringstream comment_stream; + comment_stream << id2string(ID_assertion) << " " + << format(assert_instruction->guard); + assert_instruction->source_location.set_comment(comment_stream.str()); + assert_instruction->source_location.set_property_class(ID_assertion); + auto end_instruction = add_instruction(); + end_instruction->make_end_function(); + } +}; + +class assert_false_then_assume_false_generate_function_bodiest + : public generate_function_bodiest +{ +protected: + void generate_function_body_impl( + goto_functiont &function, + const symbol_tablet &symbol_table, + const irep_idt &function_name) const override + { + auto const &function_symbol = symbol_table.lookup_ref(function_name); + // NOLINTNEXTLINE + auto add_instruction = [&]() { + auto instruction = function.body.add_instruction(); + instruction->function = function_name; + instruction->source_location = function_symbol.location; + instruction->source_location.set_function(function_name); + return instruction; + }; + auto assert_instruction = add_instruction(); + assert_instruction->make_assertion(false_exprt()); + assert_instruction->source_location.set_function(function_name); + const namespacet ns(symbol_table); + std::ostringstream comment_stream; + comment_stream << id2string(ID_assertion) << " " + << format(assert_instruction->guard); + assert_instruction->source_location.set_comment(comment_stream.str()); + assert_instruction->source_location.set_property_class(ID_assertion); + auto assume_instruction = add_instruction(); + assume_instruction->make_assumption(false_exprt()); + auto end_instruction = add_instruction(); + end_instruction->make_end_function(); + } +}; + +class havoc_generate_function_bodiest : public generate_function_bodiest, + private messaget +{ +public: + havoc_generate_function_bodiest( + std::vector globals_to_havoc, + std::regex parameters_to_havoc, + message_handlert &message_handler) + : generate_function_bodiest(), + messaget(message_handler), + globals_to_havoc(std::move(globals_to_havoc)), + parameters_to_havoc(std::move(parameters_to_havoc)) + { + } + +private: + void havoc_expr_rec( + const exprt &lhs, + const namespacet &ns, + const std::function &add_instruction, + const irep_idt &function_name) const + { + // resolve type symbols + auto const lhs_type = ns.follow(lhs.type()); + // skip constants + if(!lhs_type.get_bool(ID_C_constant)) + { + // expand arrays, structs and union, everything else gets + // assigned nondet + if(lhs_type.id() == ID_struct || lhs_type.id() == ID_union) + { + // Note: In the case of unions it's often enough to assign + // just one member; However consider a case like + // union { struct { const int x; int y; } a; + // struct { int x; const int y; } b;}; + // in this case both a.y and b.x must be assigned + // otherwise these parts stay constant even though + // they could've changed (note that type punning through + // unions is allowed in the C standard but forbidden in C++) + // so we're assigning all members even in the case of + // unions just to be safe + auto const lhs_struct_type = to_struct_union_type(lhs_type); + for(auto const &component : lhs_struct_type.components()) + { + havoc_expr_rec( + member_exprt(lhs, component.get_name(), component.type()), + ns, + add_instruction, + function_name); + } + } + else if(lhs_type.id() == ID_array) + { + auto const lhs_array_type = to_array_type(lhs_type); + if(!lhs_array_type.subtype().get_bool(ID_C_constant)) + { + bool constant_known_array_size = lhs_array_type.size().is_constant(); + if(!constant_known_array_size) + { + warning() << "Cannot havoc non-const array " << format(lhs) + << " in function " << function_name + << ": Unknown array size" << eom; + } + else + { + auto const array_size = + numeric_cast(lhs_array_type.size()); + INVARIANT( + array_size.has_value(), + "Array size should be known constant integer"); + if(array_size.value() == 0) + { + // Pretty much the same thing as a unknown size array + // Technically not allowed by the C standard, but we model + // unknown size arrays in structs this way + warning() << "Cannot havoc non-const array " << format(lhs) + << " in function " << function_name << ": Array size 0" + << eom; + } + else + { + for(mp_integer i = 0; i < array_size.value(); ++i) + { + auto const index = + from_integer(i, lhs_array_type.size().type()); + havoc_expr_rec( + index_exprt(lhs, index, lhs_array_type.subtype()), + ns, + add_instruction, + function_name); + } + } + } + } + } + else + { + auto assign_instruction = add_instruction(); + assign_instruction->make_assignment( + code_assignt(lhs, side_effect_expr_nondett(lhs_type))); + } + } + } + +protected: + void generate_function_body_impl( + goto_functiont &function, + const symbol_tablet &symbol_table, + const irep_idt &function_name) const override + { + auto const &function_symbol = symbol_table.lookup_ref(function_name); + // NOLINTNEXTLINE + auto add_instruction = [&]() { + auto instruction = function.body.add_instruction(); + instruction->function = function_name; + instruction->source_location = function_symbol.location; + return instruction; + }; + const namespacet ns(symbol_table); + for(auto const ¶meter : function.type.parameters()) + { + if( + parameter.type().id() == ID_pointer && + std::regex_match( + id2string(parameter.get_base_name()), parameters_to_havoc)) + { + // if (param != nullptr) { *param = nondet(); } + auto goto_instruction = add_instruction(); + havoc_expr_rec( + dereference_exprt( + symbol_exprt(parameter.get_identifier(), parameter.type()), + parameter.type().subtype()), + ns, + add_instruction, + function_name); + auto label_instruction = add_instruction(); + goto_instruction->make_goto( + label_instruction, + equal_exprt( + symbol_exprt(parameter.get_identifier(), parameter.type()), + null_pointer_exprt(to_pointer_type(parameter.type())))); + label_instruction->make_skip(); + } + } + + for(auto const &global_id : globals_to_havoc) + { + auto const &global_sym = symbol_table.lookup_ref(global_id); + havoc_expr_rec( + symbol_exprt(global_sym.name, global_sym.type), + ns, + add_instruction, + function_name); + } + if(function.type.return_type() != void_typet()) + { + auto return_instruction = add_instruction(); + return_instruction->make_return(); + return_instruction->code = + code_returnt(side_effect_expr_nondett(function.type.return_type())); + } + auto end_function_instruction = add_instruction(); + end_function_instruction->make_end_function(); + } + +private: + const std::vector globals_to_havoc; + std::regex parameters_to_havoc; +}; + +class generate_function_bodies_errort : public std::runtime_error +{ +public: + explicit generate_function_bodies_errort(const std::string &reason) + : runtime_error(reason) + { + } +}; + +/// Create the type that actually generates the functions. +/// \see generate_function_bodies for the syntax of the options +/// parameter +std::unique_ptr generate_function_bodies_factory( + const std::string &options, + const symbol_tablet &symbol_table, + message_handlert &message_handler) +{ + if(options.empty() || options == "nondet-return") + { + return util_make_unique( + std::vector{}, std::regex{}, message_handler); + } + + if(options == "assume-false") + { + return util_make_unique(); + } + + if(options == "assert-false") + { + return util_make_unique(); + } + + if(options == "assert-false-assume-false") + { + return util_make_unique< + assert_false_then_assume_false_generate_function_bodiest>(); + } + + const std::vector option_components = split_string(options, ','); + if(!option_components.empty() && option_components[0] == "havoc") + { + std::regex globals_regex; + std::regex params_regex; + for(std::size_t i = 1; i < option_components.size(); ++i) + { + const std::vector key_value_pair = + split_string(option_components[i], ':'); + if(key_value_pair.size() != 2) + { + throw generate_function_bodies_errort( + "Expected key_value_pair of form argument:value"); + } + if(key_value_pair[0] == "globals") + { + globals_regex = key_value_pair[1]; + } + else if(key_value_pair[0] == "params") + { + params_regex = key_value_pair[1]; + } + else + { + throw generate_function_bodies_errort( + "Unknown option \"" + key_value_pair[0] + "\""); + } + } + std::vector globals_to_havoc; + namespacet ns(symbol_table); + messaget messages(message_handler); + const std::regex cprover_prefix = std::regex("__CPROVER.*"); + for(auto const &symbol : symbol_table.symbols) + { + if( + symbol.second.is_lvalue && symbol.second.is_static_lifetime && + std::regex_match(id2string(symbol.first), globals_regex)) + { + if(std::regex_match(id2string(symbol.first), cprover_prefix)) + { + messages.warning() << "generate function bodies: " + << "matched global '" << id2string(symbol.first) + << "' begins with __CPROVER, " + << "havoc-ing this global may interfere" + << " with analysis" << messaget::eom; + } + globals_to_havoc.push_back(symbol.first); + } + } + return util_make_unique( + std::move(globals_to_havoc), std::move(params_regex), message_handler); + } + throw generate_function_bodies_errort("Can't parse \"" + options + "\""); +} + +/// Generate function bodies with some default behavior +/// A list of currently accepted command line arguments +/// + the type of bodies generated by them +/// +/// assert-false: { assert(false); } +/// +/// assume-false: { assume(false); } +/// +/// assert-false-assume-false: { +/// assert(false); +/// assume(false); } +/// +/// havoc[,params:regex][,globals:regex]: +/// Assign nondet to the targets of pointer-to-non-const parameters +/// matching regex, and non-const globals matching regex and then +/// return nondet for non-void functions, e.g.: +/// +/// int global; const int c_global; +/// int function(int *param, const int *const_param); +/// +/// havoc,params:(?!__).*,globals:(?!__).* (where (?!__) means +/// "not preceded by __", which is recommended to avoid overwrite +/// internal symbols), leads to +/// +/// int function(int *param, consnt int *const_param) { +/// *param = nondet_int(); +/// global = nondet_int(); +/// return nondet_int(); +/// } +/// +/// nondet-return: return nondet for non-void functions +/// +/// \param functions_regex Specifies the functions whose body should +/// be generated +/// \param generate_function_body Specifies what kind of body to generate +/// \param model The goto-model in which to generate the function bodies +/// \param message_handler Destination for status/warning messages +void generate_function_bodies( + const std::regex &functions_regex, + const generate_function_bodiest &generate_function_body, + goto_modelt &model, + message_handlert &message_handler) +{ + messaget messages(message_handler); + const std::regex cprover_prefix = std::regex("__CPROVER.*"); + bool did_generate_body = false; + for(auto &function : model.goto_functions.function_map) + { + if( + !function.second.body_available() && + std::regex_match(id2string(function.first), functions_regex)) + { + if(std::regex_match(id2string(function.first), cprover_prefix)) + { + messages.warning() << "generate function bodies: matched function '" + << id2string(function.first) + << "' begins with __CPROVER " + << "the generated body for this function " + << "may interfere with analysis" << messaget::eom; + } + did_generate_body = true; + generate_function_body.generate_function_body( + function.second, model.symbol_table, function.first); + } + } + if(!did_generate_body) + { + messages.warning() + << "generate function bodies: No function name matched regex" + << messaget::eom; + } +} diff --git a/src/goto-programs/generate_function_bodies.h b/src/goto-programs/generate_function_bodies.h new file mode 100644 index 00000000000..7889b847bf3 --- /dev/null +++ b/src/goto-programs/generate_function_bodies.h @@ -0,0 +1,84 @@ +/*******************************************************************\ + +Module: Replace bodies of goto functions + +Author: Diffblue Ltd. + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_GENERATE_FUNCTION_BODIES_H +#define CPROVER_GOTO_PROGRAMS_GENERATE_FUNCTION_BODIES_H + +#include +#include + +#include +#include +#include +#include +#include +#include + +/// Base class for replace_function_body implementations +class generate_function_bodiest +{ +protected: + /// Produce a body for the passed function + /// At this point the body of function is always empty, + /// and all function parameters have identifiers + /// \param function whose body to generate + /// \param symbol_table of the current goto program + /// \param function_name Identifier of function + virtual void generate_function_body_impl( + goto_functiont &function, + const symbol_tablet &symbol_table, + const irep_idt &function_name) const = 0; + +public: + virtual ~generate_function_bodiest() = default; + + /// Replace the function body with one based on the implementation + /// This will work the same whether or not the function already has a body + /// \param function whose body to replace + /// \param symbol_table of the current goto program + /// \param function_name Identifier of function + void generate_function_body( + goto_functiont &function, + symbol_tablet &symbol_table, + const irep_idt &function_name) const; + +private: + /// Generate parameter names for unnamed parameters. + /// CBMC expect functions to have parameter names + /// if they are called and have a body + void generate_parameter_names( + goto_functiont &function, + symbol_tablet &symbol_table, + const irep_idt &function_name) const; +}; + +std::unique_ptr generate_function_bodies_factory( + const std::string &options, + const symbol_tablet &symbol_table, + message_handlert &message_handler); + +void generate_function_bodies( + const std::regex &functions_regex, + const generate_function_bodiest &generate_function_body, + goto_modelt &model, + message_handlert &message_handler); + +#define OPT_REPLACE_FUNCTION_BODY \ + "(generate-function-body):" \ + "(generate-function-body-options):" + +#define HELP_REPLACE_FUNCTION_BODY \ + " --generate-function-body \n" \ + " Generate bodies for functions matching regex" \ + " --generate-function-body-options