From e383dd1d2451fe3df9acb42118c5b0bf5311fe37 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Mon, 9 Dec 2024 18:34:34 -0500 Subject: [PATCH 1/3] Give one data_object_point for referred string --- src/datalog/symbolization.dl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index 160b46fb..0dbd37fe 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -821,6 +821,12 @@ data_object_point(EA,Size,"string",-2,"string in jump table"):- preferred_data_access(EA,_,EA_ref), jump_table_start(_,_,EA_ref,_,_). +data_object_point(EA,Size,"string",1,"string that has reference"):- + data_object_candidate(EA,Size,"string"), + may_have_symbolic_immediate(Code,EA), + code_in_block(Code,_), + data_limit(EA). + // data access negative heuristic data_object_point(EA,Size,"other",4,"data access"):- data_object_candidate(EA,Size,"other"), From 2c77730f4e95f5b9463ff8061c1980dc25a11864 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Tue, 10 Dec 2024 10:00:40 -0500 Subject: [PATCH 2/3] Add a unit test and change log --- CHANGELOG.md | 1 + .../asm_examples/ex_referred_string/Makefile | 10 +++++ .../ex_referred_string/ex_original.s | 37 +++++++++++++++++++ tests/linux-elf-x64.yaml | 7 ++++ 4 files changed, 55 insertions(+) create mode 100644 examples/asm_examples/ex_referred_string/Makefile create mode 100644 examples/asm_examples/ex_referred_string/ex_original.s diff --git a/CHANGELOG.md b/CHANGELOG.md index f388b359..a5e35361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ * Generate alignments for function entry blocks depending on address * Fix bug that could result in missed symbolic expressions (`symbol_minus_symbol`) in LEA +* Fix bug that could result in false-positive symbolic data conflicting with real strings # 1.9.0 diff --git a/examples/asm_examples/ex_referred_string/Makefile b/examples/asm_examples/ex_referred_string/Makefile new file mode 100644 index 00000000..816603ac --- /dev/null +++ b/examples/asm_examples/ex_referred_string/Makefile @@ -0,0 +1,10 @@ + +all: ex_original.s + gcc ex_original.s -no-pie -Wl,-Ttext=0x512350 -o ex + @ ./ex > out.txt +clean: + rm -f ex out.txt ex.gtirb + rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb +check: + @ ./ex > /tmp/res.txt + @ diff out.txt /tmp/res.txt && echo TEST OK diff --git a/examples/asm_examples/ex_referred_string/ex_original.s b/examples/asm_examples/ex_referred_string/ex_original.s new file mode 100644 index 00000000..1e53e576 --- /dev/null +++ b/examples/asm_examples/ex_referred_string/ex_original.s @@ -0,0 +1,37 @@ + .text + + .globl main + .type main, @function +main: +.LFB6: + pushq %rbp + movq %rsp, %rbp + movl $mystring, %edi + call puts + movl $0, %eax + popq %rbp + ret + +.LFE6: + .size main, .-main + +.align 8 + .data + + .quad mydata + .zero 8 + # 0x512350 happens to be the address of `_start` (see the Makefile). + # This example is to test that this address is not symbolized as `_start` + # This program is supposed to print out "#Q". + # If this address is symbolized as `_start`, + # it will print out something else. + .ascii "P" # 0x50 +mystring: + .string "#Q" # 0x23 0x51 + .byte 0x0 + .byte 0x0 + .byte 0x0 + .byte 0x0 + +mydata: + .zero 16 diff --git a/tests/linux-elf-x64.yaml b/tests/linux-elf-x64.yaml index c8676eb0..69cc0569 100644 --- a/tests/linux-elf-x64.yaml +++ b/tests/linux-elf-x64.yaml @@ -524,6 +524,9 @@ tests: <<: *default-test cfg_checks: [] + - name: ex_referred_string + <<: *assembly + # ---------------------------------------------------------------------------- # Assembly examples. (stripped) # ---------------------------------------------------------------------------- @@ -624,6 +627,10 @@ tests: <<: *default-test cfg_checks: [] + - name: ex_referred_string + <<: *assembly + <<: *test-strip-default + # ---------------------------------------------------------------------------- # Relocatable ELF objects (.o). # ---------------------------------------------------------------------------- From 54ab2134b94d1a80ef7364f5b4f17a6f135a4c26 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Wed, 11 Dec 2024 11:34:58 -0500 Subject: [PATCH 3/3] Use code(Code) instead of code_in_block(Code,_) --- src/datalog/symbolization.dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datalog/symbolization.dl b/src/datalog/symbolization.dl index 0dbd37fe..147b01ef 100644 --- a/src/datalog/symbolization.dl +++ b/src/datalog/symbolization.dl @@ -824,7 +824,7 @@ data_object_point(EA,Size,"string",-2,"string in jump table"):- data_object_point(EA,Size,"string",1,"string that has reference"):- data_object_candidate(EA,Size,"string"), may_have_symbolic_immediate(Code,EA), - code_in_block(Code,_), + code(Code), data_limit(EA). // data access negative heuristic