Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checksum code does not work for LLVM-14, but it works for LLVM-13 #4612

Open
luokaink opened this issue May 18, 2023 · 7 comments
Open

Checksum code does not work for LLVM-14, but it works for LLVM-13 #4612

luokaink opened this issue May 18, 2023 · 7 comments

Comments

@luokaink
Copy link

attribute((always_inline))
static inline __u16 caludpcsum(struct iphdr *iph, struct udphdr *udph, void *data_end)
{
__u32 csum_buffer = 0;
__u16 *buf = (void *)udph;

// Compute pseudo-header checksum
csum_buffer += (__u16)iph->saddr;
csum_buffer += (__u16)(iph->saddr >> 16);
csum_buffer += (__u16)iph->daddr;
csum_buffer += (__u16)(iph->daddr >> 16);
csum_buffer += (__u16)iph->protocol << 8;
csum_buffer += udph->len;

// Compute checksum on udp header + payload
for (int i = 0; i < MAX_UDP_SIZE; i += 2) 
{
    if ((void *)(buf + 1) > data_end) 
    {
        break;
    }

    csum_buffer += *buf;
    buf++;
}

if ((void *)buf + 1 <= data_end) 
{
    // In case payload is not 2 bytes aligned
    csum_buffer += *(__u8 *)buf;
}

__u16 csum = (__u16)csum_buffer + (__u16)(csum_buffer >> 16);
csum = ~csum;

return csum;

}

The above code does not work for LLVM-14 in ubutun 22.04, but it works for LLVM-13/LLVM-12. I think there is a bug in LLVM-14 with bcc. The error code:

; for (u16 i = 0; i < MAX_UDP_SIZE; i += 2) {
43: (2d) if r7 > r6 goto pc+158 ; R6=pkt_end(off=0,imm=0) R7=pkt(off=36,r=42,imm=0)
44: (bf) r9 = r6 ; R6=pkt_end(off=0,imm=0) R9_w=pkt_end(off=0,imm=0)
45: (1f) r9 -= r8 ; R8=pkt(off=0,r=42,imm=0) R9_w=scalar()
46: (07) r9 += -36 ; R9_w=scalar()
47: (77) r9 >>= 1 ; R9_w=scalar(umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))
48: (b7) r1 = 739 ; R1_w=739
49: (2d) if r1 > r9 goto pc+1 ; R1_w=739 R9_w=scalar(umin=739,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))
50: (b7) r9 = 739 ; R9=739
51: (b7) r5 = 0 ; R5_w=0
52: (b7) r2 = 7 ; R2_w=7
53: (bf) r1 = r7 ; R1_w=pkt(off=36,r=42,imm=0) R7=pkt(off=36,r=42,imm=0)
54: (2d) if r2 > r9 goto pc+134 ; R2_w=P7 R9=P739
55: (7b) *(u64 *)(r10 -128) = r8 ; R8=pkt(off=0,r=42,imm=0) R10=fp0 fp-128_w=pkt
56: (7b) *(u64 *)(r10 -136) = r3 ; R3=pkt(off=34,r=42,imm=0) R10=fp0 fp-136_w=pkt
57: (7b) *(u64 *)(r10 -152) = r6 ; R6=pkt_end(off=0,imm=0) R10=fp0 fp-152_w=pkt_end
58: (79) r1 = *(u64 *)(r10 -96) ; R1_w=scalar(id=1,umax=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0
59: (7b) *(u64 *)(r10 -112) = r7 ; R7=pkt(off=36,r=42,imm=0) R10=fp0 fp-112_w=pkt
60: (07) r9 += 1 ; R9_w=P740
61: (bf) r1 = r9 ; R1_w=P740 R9_w=P740
62: (bf) r8 = r4 ; R4=scalar(id=4,umin=4352,umax=332027,var_off=(0x0; 0x7ffff)) R8_w=scalar(id=4,umin=4352,umax=332027,var_off=(0x0; 0x7ffff))
63: (b7) r9 = 0 ; R9_w=0
64: (7b) *(u64 *)(r10 -160) = r1 ; R1_w=P740 R10=fp0 fp-160_w=P740
65: (57) r1 &= 2040 ; R1_w=P736
66: (7b) *(u64 *)(r10 -104) = r1 ; R1_w=P736 R10=fp0 fp-104_w=P736
67: (07) r1 += -8 ; R1_w=P728
68: (bf) r5 = r1 ; R1_w=P728 R5_w=P728
69: (77) r5 >>= 3 ; R5_w=P91
70: (07) r5 += 1 ; R5_w=P92
71: (bf) r2 = r5 ; R2_w=P92 R5_w=P92
72: (57) r2 &= 1 ; R2_w=P0
73: (7b) *(u64 *)(r10 -168) = r2 ; R2_w=P0 R10=fp0 fp-168_w=00000000
74: (b7) r2 = 0 ; R2_w=0
75: (7b) *(u64 *)(r10 -144) = r2 ; R2_w=P0 R10=fp0 fp-144_w=00000000
76: (b7) r7 = 0 ; R7_w=0
77: (b7) r6 = 0 ; R6_w=0
78: (b7) r4 = 0 ; R4_w=0
79: (b7) r2 = 0 ; R2_w=0
80: (b7) r3 = 0 ; R3_w=0
81: (b7) r0 = 0 ; R0_w=0
82: (15) if r1 == 0x0 goto pc+51 ; R1_w=P728
83: (18) r1 = 0x3ffffffffffffffe ; R1_w=4611686018427387902
85: (5f) r5 &= r1 ; R1_w=P4611686018427387902 R5_w=P92
86: (b7) r4 = 0 ; R4_w=0
87: (79) r1 = *(u64 *)(r10 -128) ; R1_w=pkt(off=0,r=42,imm=0) R10=fp0
88: (07) r1 += 58 ; R1_w=pkt(off=58,r=42,imm=0)
89: (bf) r2 = r5 ; R2_w=P92 R5_w=P92
90: (67) r2 <<= 3 ; R2_w=P736
91: (7b) *(u64 *)(r10 -144) = r2 ; R2_w=P736 R10=fp0 fp-144_w=P736
92: (b7) r7 = 0 ; R7_w=0
93: (b7) r6 = 0 ; R6_w=0
94: (b7) r2 = 0 ; R2_w=0
95: (b7) r3 = 0 ; R3_w=0
96: (b7) r0 = 0 ; R0=0
97: (7b) *(u64 *)(r10 -88) = r5 ; R5=P92 R10=fp0 fp-88_w=P92
; sum += *ip_payload++;
98: (69) r5 = *(u16 *)(r1 -24) ; R1=pkt(off=58,r=42,imm=0) R5_w=scalar(umax=65535,var_off=(0x0; 0xffff))
; sum += *ip_payload++;
99: (0f) r8 += r5 ; R5_w=scalar(umax=65535,var_off=(0x0; 0xffff)) R8_w=scalar(umin=4352,umax=397562,var_off=(0x0; 0x7ffff))
; sum += *ip_payload++;
100: (69) r5 = *(u16 *)(r1 -16)
invalid access to packet, off=42 size=2, R1(id=0,off=42,r=42)
R1 offset is outside of the packet
processed 100 insns (limit 1000000) max_states_per_insn 0 total_states 4 peak_states 4 mark_read 2

@FedeParola
Copy link
Contributor

Hi @luokaink, I guess LLVM-14 is applying some kind of optimization that makes the eBPF verifier loose track of the check on packet boundary in the for loop.
One quick solution can be declaring the buffer pointer we use to scan the payload as volatile, so the optimizer will be less aggressive on it:

__u16 * volatile buf = (void *)udph;

This unfortunately will make the code slower, since every operation involving the variable will fetch it from memory, instead of keeping it into a register.

Let me know if it works for you.

@luokaink
Copy link
Author

Hi @FedeParola , thanks a lot for your response. I think this solution is just a workaround for this issue. Is there a plan to fix this issue completely? I just use LLVM-12 in my code to avoid this issue by now

@chenhengqi
Copy link
Collaborator

@luokaink Can you post a minimal script to repro the issue ?

@luokaink
Copy link
Author

@luokaink Can you post a minimal script to repro the issue ?

#include <linux/ethtool.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/ipv6.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include <bcc/BPF.h>

#define XDP_PROGRAM "xdp_prog"

// ebpf kernel program code
const char BPF_BATCH_PROGRAM[] = R"(
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <uapi/linux/bpf.h>

#define MAX_UDP_SIZE 1480

static __always_inline void set_udp_csum(struct iphdr *iph, struct udphdr *udph, void *data_end)
{
__u32 csum_buffer = 0;
__u16 *buf = (void *)udph;

udph->check = 0;

// Compute pseudo-header checksum
csum_buffer += (__u16)iph->saddr;
csum_buffer += (__u16)(iph->saddr >> 16);
csum_buffer += (__u16)iph->daddr;
csum_buffer += (__u16)(iph->daddr >> 16);
csum_buffer += (__u16)iph->protocol << 8;
csum_buffer += udph->len;

// Compute checksum on udp header + payload
for (int i = 0; i < MAX_UDP_SIZE; i += 2) {
if ((void *)(buf + 1) > data_end) {
break;
}

csum_buffer += *buf;
buf++;

}

if ((void *)buf + 1 <= data_end) {
// In case payload is not 2 bytes aligned
csum_buffer += *(__u8 *)buf;
}

__u16 csum = (__u16)csum_buffer + (__u16)(csum_buffer >> 16);
csum = ~csum;

udph->check = csum;
}

int xdp_prog(struct xdp_md *ctx) {
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth;
struct iphdr *iph;

// Drop the malformed packet
if (data + sizeof(struct ethhdr) > data_end) {
return XDP_DROP;
}

eth = (struct ethhdr *)data;
if (eth->h_proto != bpf_htons(ETH_P_IP)) {
return XDP_PASS;
}

iph = data + sizeof(struct ethhdr);
if ((void *)iph + sizeof(struct iphdr) > data_end) {
return XDP_PASS;
}

struct udphdr *udp;
udp = (struct udphdr *) (iph + 1);
if ((void *) (udp + 1) > data_end) {
return XDP_DROP;
}

set_udp_csum(iph, udp, data_end);

return XDP_PASS;
}
)";

void init_port_map(const char *net_dev) {
ebpf::BPF bpf;

auto result = bpf.init(BPF_BATCH_PROGRAM);
if (result.code() != 0) {
exit(-1);
}

int fd = -1;
auto load_res = bpf.load_func(XDP_PROGRAM, BPF_PROG_TYPE_XDP, fd);
if (load_res.code() != 0) {
exit(-1);
}

auto attach_result = bpf_attach_xdp(net_dev, fd, 0);
if (attach_result != 0) {
exit(-1);
}
}

int main() {
init_port_map("lo");
}

@yonghong-song
Copy link
Collaborator

Ya, the following is problematic w.r.t. verifier,

 45: (1f) r9 -= r8 ; R8=pkt(off=0,r=42,imm=0) R9_w=scalar()

I tried to use pkt_end - pkt_start and then use the resulted scalar to deduce packet size.
Unfortunately the packet size determined this way is not reflected to packet pointer itself, so
this caused later verification failure.

For this case, we won't be able to fix verifier as the complexity to enhance verifier for such cases
is too high. We can only either change compiler or have source workaround. Let me take a look
to see how to resolve with minimum user impact.

@yonghong-song
Copy link
Collaborator

The following is another workaround with asm code. Hopefully it won't impact performance a lot.

@@ -47,13 +47,24 @@ csum_buffer += udph->len;
 
 // Compute checksum on udp header + payload
 for (int i = 0; i < MAX_UDP_SIZE; i += 2) {
-if ((void *)(buf + 1) > data_end) {
-break; 
-}
-
-csum_buffer += *buf;
-buf++; 
-
+  u64 cond;
+
+  // cond = 0;
+  // if ((void *)(buf + 1) > data_end)
+  //   cond = 1;
+  asm volatile("%[COND] = 0; \ 
+                if %[BUF] > %[DATA_END] goto 1f; \
+                %[COND] = 1;              \
+                1:                  \
+               "
+               : [COND]"+r"(cond)
+               : [BUF]"r"(buf + 1), [DATA_END]"r"(data_end));                
+
+  if (cond == 0)
+    break;
+
+  csum_buffer += *buf;
+  buf++;
 }
 
 if ((void *)buf + 1 <= data_end) {

I understand using asm code as a workaround is not pleasant. The transformation like data + num < data_end to
num < data_end - data is a perfect legal optimization by the compiler, so there is no compiler bug here.
Should we unconditionally disable all transformation like data + num < data_end to
num < data_end - data ? This will probably cause some heat llvm upstream discussion. Anyway, I continue to discuss with other bpf maintainers/reviewers, hopefully getting some result soon.

@yonghong-song
Copy link
Collaborator

I did further investigation. The transformation like data + num < data_end to num < data_end - data is done at native x86_64 transformation. So there is nothing bpf backend can do.

Currently in bcc infrastructure, after initial IR is generated, there is an optimization pass (see module.cc, function run_pass_manager). This is done with native arch (e.g., x86). I guess if we could change it to compile with
bpf backend, the problem might be solved, or if not we might make llvm bpf backend changes to fix it. If anybody is interested, feel free to take a look!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants