Skip to content

Commit

Permalink
Create a known-patchable sequence for rdtsc trapping
Browse files Browse the repository at this point in the history
I was looking some disassembly that looked like this:
```
   0x00007f5d7d0e7ffd <+29>:	48 89 7c 24 10	mov    %rdi,0x10(%rsp)
   0x00007f5d7d0e8002 <+34>:	4c 8d 64 24 20	lea    0x20(%rsp),%r12
   0x00007f5d7d0e8007 <+39>:	eb 37	jmp    0x7f5d7d0e8040 <_ZN5tracy8Profiler14CalibrateDelayEv+96>
   0x00007f5d7d0e8009 <+41>:	0f 1f 80 00 00 00 00	nopl   0x0(%rax)
   0x00007f5d7d0e8010 <+48>:	e9 03 8b 1a 3c	jmpq   0x7f5db9290b18
   0x00007f5d7d0e8015 <+53>:	90	nop
   0x00007f5d7d0e8016 <+54>:	4c 8d 2c 02	lea    (%rdx,%rax,1),%r13
   0x00007f5d7d0e801a <+58>:	e8 31 60 fc ff	callq  0x7f5d7d0ae050 <_ZN5tracy28HardwareSupportsInvariantTSCEv@plt>
   0x00007f5d7d0e801f <+63>:	84 c0	test   %al,%al
   0x00007f5d7d0e8021 <+65>:	74 4a	je     0x7f5d7d0e806d <_ZN5tracy8Profiler14CalibrateDelayEv+141>
=> 0x00007f5d7d0e8023 <+67>:	0f 31	rdtsc
   0x00007f5d7d0e8025 <+69>:	48 c1 e2 20	shl    $0x20,%rdx
```
We do have a syscallbuf template for `shl $0x20,%rdx`.
Irritatingly, however, the `7c 24` at the top trigger's rr's interfering branch heuristic
by false-positive coincidence. Even more unfortunately, this is a calibration loop
for a tracing library, so it does actually just call `rdtsc` in a loop a bunch.
Additionally this is shipped as a binary, so every user is hitting this unfortunate coincidence.
Of course, since we're shipping the library, I can suggest we modify the source to make
it more friendly to rr patching. That said, I wasn't quite sure what would be best to suggest here.

At first I thought, I could simply insert a bunch of nops after.
Unfortunately, that doesn't actually do anything, because the branch could still
be interfering into the middle of the nop sled.
Then I thought maybe a singular large `nop` would be sufficient, but of course,
there could be something that intentionally conditionally skips the rdtsc.

In this PR, I propose that we make the following sequence known-safe:
```
nopl 0(%ax, %ax, 1) # single instruction, 5-byte nop
rdtsc
```

This currently wouldn't quite work, because an interfering jump to the
`rdtsc` would simply hit the trailing nop padding, ignoring the rdtsc.
However, if we slightly tweak the patch to instead use:

```
1: jmp %hook
[usual nop padding here]
jmp 1b
2:
```

Then everything works out well. Our return address is past the entire patch
region, so we return to the correct place and it doesn't matter whether
we jump to the nop or to the instruction itself. Of course, and actual
interfering branch over the nop is unlikely (though I supposed not
impossible depending on what comes before), since interfering branches
are supported, this nicely takes care of the spurious branch problem
above (assuming the extra 5-byte nop is inserted).
  • Loading branch information
Keno committed Aug 18, 2023
1 parent 9363924 commit 2a49b30
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 57 deletions.
144 changes: 87 additions & 57 deletions src/Monkeypatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,11 @@ remote_code_ptr Monkeypatcher::get_jump_stub_exit_breakpoint(remote_code_ptr ip,
return nullptr;
}

static bool hook_can_ignore_interfering_branches(const syscall_patch_hook& hook, size_t jump_patch_size) {
return hook.patch_region_length >= jump_patch_size &&
(hook.flags & (PATCH_IS_MULTIPLE_INSTRUCTIONS | PATCH_IS_NOP_INSTRUCTIONS)) == PATCH_IS_NOP_INSTRUCTIONS;
}

/**
* Some functions make system calls while storing local variables in memory
* below the stack pointer. We need to decrement the stack pointer by
Expand Down Expand Up @@ -522,7 +527,8 @@ static bool patch_syscall_with_hook_x86ish(Monkeypatcher& patcher,
remote_code_ptr ip_of_instruction,
size_t instruction_length,
uint32_t fake_syscall_number) {
uint8_t jump_patch[instruction_length + hook.patch_region_length];
size_t patch_region_size = instruction_length + hook.patch_region_length;
uint8_t jump_patch[patch_region_size];
// We're patching in a relative jump, so we need to compute the offset from
// the end of the jump to our actual destination.
remote_ptr<uint8_t> jump_patch_start = ip_of_instruction.to_data_ptr<uint8_t>();
Expand All @@ -531,7 +537,7 @@ static bool patch_syscall_with_hook_x86ish(Monkeypatcher& patcher,
}
remote_ptr<uint8_t> jump_patch_end = jump_patch_start + JumpPatch::size;
remote_ptr<uint8_t> return_addr =
jump_patch_start + instruction_length + hook.patch_region_length;
jump_patch_start + patch_region_size;

remote_ptr<uint8_t> extended_jump_start;
if (fake_syscall_number) {
Expand Down Expand Up @@ -575,6 +581,15 @@ static bool patch_syscall_with_hook_x86ish(Monkeypatcher& patcher,
// pad with NOPs to the next instruction
static const uint8_t NOP = 0x90;
memset(jump_patch, NOP, sizeof(jump_patch));
if (hook_can_ignore_interfering_branches(hook, JumpPatch::size)) {
// If the preceding instruction is long enough to contain the entire jump,
// and is a nop, replace the original instruction by a jump back to the
// start of the patch region. This allows us to ignore (likely spurious,
// but nevertheless), interfering branches, because whether we jump to the
// instruction or the start of the patch region, the effect is the same.
jump_patch[patch_region_size-2] = 0xeb; // jmp rel
jump_patch[patch_region_size-1] = (int8_t)-patch_region_size;
}
JumpPatch::substitute(jump_patch, jump_offset32);
bool ok = true;
write_and_record_bytes(t, jump_patch_start, sizeof(jump_patch), jump_patch, &ok);
Expand Down Expand Up @@ -951,6 +966,18 @@ bool Monkeypatcher::try_patch_vsyscall_caller(RecordTask* t, remote_code_ptr ret
return true;
}

static uint64_t jump_patch_size(SupportedArch arch)
{
switch (arch) {
case x86: return X86SysenterVsyscallSyscallHook::size;
case x86_64: return X64JumpMonkeypatch::size;
case aarch64: return 2*rr::syscall_instruction_length(arch);
default:
FATAL() << "Unimplemented for this architecture";
return 0;
}
}

const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
remote_code_ptr ip,
bool entering_syscall,
Expand Down Expand Up @@ -1020,69 +1047,72 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
continue;
}

// Search for a following short-jump instruction that targets an
// instruction
// after the syscall. False positives are OK.
// glibc-2.23.1-8.fc24.x86_64's __clock_nanosleep needs this.
bool found_potential_interfering_branch = false;
for (size_t i = buf_valid_start_offset; i + 2 <= buf_valid_end_offset; ++i) {
uint8_t b = bytes[i];
// Check for short conditional or unconditional jump
if (b == 0xeb || (b >= 0x70 && b < 0x80)) {
int offset_from_instruction_end = (int)i + 2 + (int8_t)bytes[i + 1] -
(LOOK_BACK + instruction_length);
if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) {
if (!(hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS)) {
found_potential_interfering_branch = offset_from_instruction_end == -(ssize_t)instruction_length;
} else {
found_potential_interfering_branch =
offset_from_instruction_end <= -(ssize_t)instruction_length &&
offset_from_instruction_end > -(ssize_t)(instruction_length + hook.patch_region_length);
}
} else {
if (!(hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS)) {
found_potential_interfering_branch = offset_from_instruction_end == 0;
if (!hook_can_ignore_interfering_branches(hook, jump_patch_size(t->arch()))) {
// Search for a following short-jump instruction that targets an
// instruction
// after the syscall. False positives are OK.
// glibc-2.23.1-8.fc24.x86_64's __clock_nanosleep needs this.
bool found_potential_interfering_branch = false;
for (size_t i = buf_valid_start_offset; i + 2 <= buf_valid_end_offset; ++i) {
uint8_t b = bytes[i];
// Check for short conditional or unconditional jump
if (b == 0xeb || (b >= 0x70 && b < 0x80)) {
int offset_from_instruction_end = (int)i + 2 + (int8_t)bytes[i + 1] -
(LOOK_BACK + instruction_length);
if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) {
if (!(hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS)) {
found_potential_interfering_branch = offset_from_instruction_end == -(ssize_t)instruction_length;
} else {
found_potential_interfering_branch =
offset_from_instruction_end <= -(ssize_t)instruction_length &&
offset_from_instruction_end > -(ssize_t)(instruction_length + hook.patch_region_length);
}
} else {
found_potential_interfering_branch =
offset_from_instruction_end >= 0 && offset_from_instruction_end < hook.patch_region_length;
if (!(hook.flags & PATCH_IS_MULTIPLE_INSTRUCTIONS)) {
found_potential_interfering_branch = offset_from_instruction_end == 0;
} else {
found_potential_interfering_branch =
offset_from_instruction_end >= 0 && offset_from_instruction_end < hook.patch_region_length;
}
}
}
if (found_potential_interfering_branch) {
LOG(debug) << "Found potential interfering branch at "
<< ip.to_data_ptr<uint8_t>() - LOOK_BACK + i;
break;
}
}

if (found_potential_interfering_branch) {
LOG(debug) << "Found potential interfering branch at "
<< ip.to_data_ptr<uint8_t>() - LOOK_BACK + i;
break;
continue;
}
}

if (!found_potential_interfering_branch) {
remote_code_ptr start_range, end_range;
if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) {
start_range = ip - hook.patch_region_length;
// if a thread has its RIP at the end of our range,
// it could be immediately after a syscall instruction that
// will need to be restarted. Patching out that instruction will
// prevent the kernel from restarting it. So, extend our range by
// one byte to detect such threads.
end_range = ip + instruction_length + 1;
} else {
start_range = ip;
end_range = ip + instruction_length + hook.patch_region_length;
}
if (!safe_for_syscall_patching(start_range, end_range, t)) {
LOG(debug)
<< "Temporarily declining to patch syscall at " << ip
<< " because a different task has its ip in the patched range";
return nullptr;
}
LOG(debug) << "Trying to patch bytes "
<< bytes_to_string(
following_bytes,
min<size_t>(following_bytes_count,
sizeof(syscall_patch_hook::patch_region_bytes)));

return &hook;
remote_code_ptr start_range, end_range;
if (hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) {
start_range = ip - hook.patch_region_length;
// if a thread has its RIP at the end of our range,
// it could be immediately after a syscall instruction that
// will need to be restarted. Patching out that instruction will
// prevent the kernel from restarting it. So, extend our range by
// one byte to detect such threads.
end_range = ip + instruction_length + 1;
} else {
start_range = ip;
end_range = ip + instruction_length + hook.patch_region_length;
}
if (!safe_for_syscall_patching(start_range, end_range, t)) {
LOG(debug)
<< "Temporarily declining to patch syscall at " << ip
<< " because a different task has its ip in the patched range";
return nullptr;
}
LOG(debug) << "Trying to patch bytes "
<< bytes_to_string(
following_bytes,
min<size_t>(following_bytes_count,
sizeof(syscall_patch_hook::patch_region_bytes)));

return &hook;
}

LOG(debug) << "Failed to find a syscall hook for bytes "
Expand Down
5 changes: 5 additions & 0 deletions src/preload/preload_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ static inline const char* extract_file_name(const char* s) {
* (rather than the first), which requires special handling.
*/
#define PATCH_SYSCALL_INSTRUCTION_IS_LAST (1 << 1)
/* All instructions in the patch are nop and their execution is thus not
* observable. This may allow more aggressive handling of interfering branches.
*/
#define PATCH_IS_NOP_INSTRUCTIONS (1 << 2)


/**
* To support syscall buffering, we replace syscall instructions with a "call"
Expand Down
4 changes: 4 additions & 0 deletions src/preload/syscall_hook.S
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,10 @@ SYSCALLHOOK_START(_syscall_hook_trampoline_48_89_fb)
callq __morestack
SYSCALLHOOK_END(_syscall_hook_trampoline_48_89_fb)

SYSCALLHOOK_START(_syscall_hook_trampoline_nops)
callq __morestack
SYSCALLHOOK_END(_syscall_hook_trampoline_nops)

#elif defined(__aarch64__)
.text

Expand Down
7 changes: 7 additions & 0 deletions src/preload/syscallbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,7 @@ static void __attribute__((constructor)) init_process(void) {
extern RR_HIDDEN void _syscall_hook_trampoline_48_89_e5(void);
extern RR_HIDDEN void _syscall_hook_trampoline_48_89_fb(void);
extern RR_HIDDEN void _syscall_hook_trampoline_48_8d_b3_f0_08_00_00(void);
extern RR_HIDDEN void _syscall_hook_trampoline_nops(void);

#define MOV_RDX_VARIANTS \
MOV_RDX_TO_REG(48, d0) \
Expand Down Expand Up @@ -1002,6 +1003,12 @@ static void __attribute__((constructor)) init_process(void) {
3,
{ 0x48, 0x89, 0xfb },
(uintptr_t)_syscall_hook_trampoline_48_89_fb },
/* Support explicit 5 byte nop (`nopl 0(%ax, %ax, 1)`) before 'rdtsc' or syscall (may ignore interfering branches) */
{ PATCH_SYSCALL_INSTRUCTION_IS_LAST |
PATCH_IS_NOP_INSTRUCTIONS,
5,
{ 0x0f, 0x1f, 0x44, 0x00, 0x00 },
(uintptr_t)_syscall_hook_trampoline_nops }
};
#elif defined(__aarch64__)
extern RR_HIDDEN void _syscall_hook_trampoline_raw(void);
Expand Down
29 changes: 29 additions & 0 deletions src/test/x86/rdtsc_interfering.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@

#include "util.h"

void do_rdtsc_loop(uint32_t with_jump) {
#ifdef __x86_64__
int i;

uint64_t prev_tsc = 0;
for (i = 0; i < 2500000; ++i) {
uint32_t out;
uint32_t out_hi;
asm volatile ("test %%rcx, %%rcx\n\t"
// This branch is useless, but let's make sure it works anyway.
// In practice, we mostly want to make sure that spurious interfering
// branches don't prevent patching so that the `nopl 0(%ax, %ax, 1); rdtsc`
// sequence is guaranteed to patch properly.
"jnz 1f\n\t"
".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
"1: rdtsc\n\t"
: "=a"(out), "=d"(out_hi)
: "c"(with_jump)
: "cc");
uint64_t tsc = ((uint64_t)out_hi << 32) + out;
test_assert(prev_tsc < tsc);
prev_tsc = tsc;
}
#endif
(void)with_jump;
}

uint64_t rdtsc_interfering_sideeffect(uint32_t do_jump) {
uint64_t check = 0xdeadbeef;
uint64_t challenge = 0xfeedbeef;
Expand Down Expand Up @@ -33,6 +60,8 @@ int main(void) {
uint64_t tsc1 = rdtsc_interfering_sideeffect(0);
uint64_t tsc2 = rdtsc_interfering_sideeffect(1);
test_assert(tsc1 < tsc2);
do_rdtsc_loop(0);
do_rdtsc_loop(1);
atomic_puts("EXIT-SUCCESS");
return 0;
}

0 comments on commit 2a49b30

Please sign in to comment.