diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 074b41fafbe3ff..85d9af13f329f9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -274,6 +274,8 @@ struct jit_context { #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) +/* Number of extra bytes that will be skipped on poke */ +#define X86_POKE_EXTRA 3 static void push_r12(u8 **pprog) { @@ -440,18 +442,14 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) * while jumping to another program */ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, - bool tail_call_reachable, bool is_subprog, + bool is_extension, bool is_subprog, bool is_exception_cb) { u8 *prog = *pprog; emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash); - /* BPF trampoline can be made to work without these nops, - * but let's waste 5 bytes for now and optimize later - */ - emit_nops(&prog, X86_PATCH_SIZE); if (!ebpf_from_cbpf) { - if (tail_call_reachable && !is_subprog) + if (!is_extension && !is_subprog) /* When it's the entry of the whole tailcall context, * zeroing rax means initialising tail_call_cnt. */ @@ -460,6 +458,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* Keep the same instruction layout. */ emit_nops(&prog, 3); /* nop3 */ } + /* BPF trampoline can be made to work without these nops, + * but let's waste 5 bytes for now and optimize later + */ + emit_nops(&prog, X86_PATCH_SIZE); /* Exception callback receives FP as third parameter */ if (is_exception_cb) { EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */ @@ -483,7 +485,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); - if (tail_call_reachable) + if (!ebpf_from_cbpf) emit_prologue_tail_call(&prog, is_subprog); *pprog = prog; } @@ -573,10 +575,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* * See emit_prologue(), for IBT builds the trampoline hook is preceded - * with an ENDBR instruction. + * with an ENDBR instruction and 3 bytes tail_call_cnt initialization + * instruction. */ if (is_endbr(*(u32 *)ip)) ip += ENDBR_INSN_SIZE; + if (is_bpf_text_address((long)ip)) + ip += X86_POKE_EXTRA; return __bpf_arch_text_poke(ip, t, old_addr, new_addr); } @@ -1365,7 +1370,7 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { - bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; + bool is_extension = bpf_prog->type == BPF_PROG_TYPE_EXT; struct bpf_insn *insn = bpf_prog->insnsi; bool callee_regs_used[4] = {}; int insn_cnt = bpf_prog->len; @@ -1383,7 +1388,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image detect_reg_usage(insn, insn_cnt, callee_regs_used); emit_prologue(&prog, bpf_prog->aux->stack_depth, - bpf_prog_was_classic(bpf_prog), tail_call_reachable, + bpf_prog_was_classic(bpf_prog), is_extension, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. @@ -2077,10 +2082,8 @@ st: if (is_imm8(insn->off)) u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; - if (tail_call_reachable) { - LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); - ip += 7; - } + LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); + ip += 7; if (!imm32) return -EINVAL; ip += x86_call_depth_emit_accounting(&prog, func, ip); @@ -2877,7 +2880,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * [ ... ] * [ stack_arg2 ] * RBP - arg_stack_off [ stack_arg1 ] - * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX + * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL */ /* room for return value of orig_call or fentry prog */ @@ -2923,6 +2926,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im */ if (is_endbr(*(u32 *)orig_call)) orig_call += ENDBR_INSN_SIZE; + if (is_bpf_text_address((long)orig_call)) + orig_call += X86_POKE_EXTRA; orig_call += X86_PATCH_SIZE; } @@ -2949,8 +2954,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* sub rsp, stack_size */ EMIT4(0x48, 0x83, 0xEC, stack_size); } - if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) - EMIT1(0x50); /* push rax */ + if (flags & BPF_TRAMP_F_TAIL_CALL) + EMIT1(0x50); /* push rax */ /* mov QWORD PTR [rbp - rbx_off], rbx */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); @@ -3005,7 +3010,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im restore_regs(m, &prog, regs_off); save_args(m, &prog, arg_stack_off, true); - if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { + if (flags & BPF_TRAMP_F_TAIL_CALL) { /* Before calling the original function, load the * tail_call_cnt_ptr from stack to rax. */ @@ -3025,6 +3030,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); im->ip_after_call = image + (prog - (u8 *)rw_image); + emit_nops(&prog, X86_POKE_EXTRA); emit_nops(&prog, X86_PATCH_SIZE); } @@ -3067,7 +3073,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im ret = -EINVAL; goto cleanup; } - } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { + } else if (flags & BPF_TRAMP_F_TAIL_CALL) { /* Before running the original function, load the * tail_call_cnt_ptr from stack to rax. */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc63083f76b7c9..30a3deeb6ecd70 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1074,10 +1074,10 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) -/* Indicate that current trampoline is in a tail call context. Then, it has to - * cache and restore tail_call_cnt to avoid infinite tail call loop. +/* Indicate that current trampoline is required to cache and restore + * tail_call_cnt or tail_call_cnt_ptr to avoid infinite tail call loop. */ -#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) +#define BPF_TRAMP_F_TAIL_CALL BIT(7) /* * Indicate the trampoline should be suitable to receive indirect calls; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index f8302a5ca400da..257adbc83bae3e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -409,8 +409,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut goto out; } - /* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */ - tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX); + /* clear all bits except SHARE_IPMODIFY and TAIL_CALL */ + tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL); if (tlinks[BPF_TRAMP_FEXIT].nr_links || tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 33270b3636892e..6bdf3eccfbe4d0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -22112,8 +22112,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!tr) return -ENOMEM; - if (tgt_prog && tgt_prog->aux->tail_call_reachable) - tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX; + if (tgt_prog) + tr->flags = BPF_TRAMP_F_TAIL_CALL; prog->aux->dst_trampoline = tr; return 0;