Which feature does linux kernel update that lead to heap not executable on linux 5.10 but executable on linux 5.0 while NX(DEP) disabled - Debian

Ex1t
June 27, 2021
281 views
0 votes
2 Answers

It’s a binary compiled by C. I wanted to run some code on heap,but I got a segment fault. Then I use gdb to check the binary. It showed the NX was disabled (complied with -z execstack option) and I had RWX access on stack segment while I could only have RW access on heap segment.This happened on a linux 5.10 kernel debian. Then I ran this binary on a linxu 5.10 kernel Alpine Docker on my Macbook, it happened,too.

I remembered that I could execute code in heap a year before. And I tried to run this binary again on an old Ubuntu18 with linux 5.0 kernel. It returned successfully. The codes in stack were executed without error.

In all, my question is if there is any new feature updated from linux 5.0 to linux 5.10 which leads to the heap not executable anymore.

Tags: c#heap-memory kernel linux security

Answers

When ELF binaries are linked with the execstack linker option, the permissions for the ELF segment GNU_STACK are changed from RW (read and write) to RWE (read, write, and execute).

When the Linux kernel loads an ELF binary executable, it looks at the GNU_STACK segment to see if the binary wants an executable stack. See fs/binfmt_elf.c:load_elf_binary() for details, especially the executable_stack variable, and the EXSTACK_DEFAULT macro. Depending on the kernel configuration, it may or may not make the stack executable.

This is an often forgotten backwards compatibility support feature, which can also affect other memory mappings beyond just stacks. Here is an example program one can compile and run, to verify:

// SPDX-Licence-Identifier: CC0-1.0
// -*- coding: utf-8 -*-

#define  _POSIX_C_SOURCE  200809L
#define  _GNU_SOURCE
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <errno.h>

#ifndef  BUFFER_SIZE
#define  BUFFER_SIZE  128
#endif

#define  PERM_R  1  /* Readable */
#define  PERM_W  2  /* Writable */
#define  PERM_X  4  /* Executable */
#define  PERM_P  8  /* Private */
#define  PERM_S 16  /* Shared */
#define  PERM_RWP  (PERM_R|PERM_W|PERM_P)

struct mapping {
    struct mapping *next;
    void           *addr;
    void           *ends;
    unsigned char   perm;
    char            name[];
};

static inline struct mapping *reverse_mappings(struct mapping *list)
{
    struct mapping  *result = NULL;
    while (list) {
        struct mapping  *curr = list;
        list = list->next;
        curr->next = result;
        result = curr;
    }
    return result;
}

static inline void  free_mappings(struct mapping *list)
{
    while (list) {
        struct mapping  *curr = list;
        list = list->next;
        curr->next = NULL;
        curr->addr = NULL;
        curr->ends = NULL;
        curr->perm = 0;
        curr->name[0] = '';
        free(curr);
    }
}

static struct mapping *get_mappings(void)
{
    struct mapping  *list = NULL;
    char            *line = NULL;
    size_t           size = 0;
    FILE            *maps;

    maps = fopen("/proc/self/maps", "r");
    if (!maps) {
        const int  saved_errno = errno;
        fprintf(stderr, "Cannot read /proc/self/maps: %s.n", strerror(saved_errno));
        errno = saved_errno;
        return NULL;
    }

    while (1) {
        struct mapping  *curr;
        unsigned long    addr = 0;
        unsigned long    ends = 0;
        char             perms[8] = { 0 };
        int              nameoff;

        ssize_t  len = getline(&line, &size, maps);
        if (len == -1)
            break;

        if (len < 1)
            continue;
        if (line[len-1] == 'n')
            line[--len] = '';

        nameoff = 0;
        if (sscanf(line, "%lx-%lx %7s %*s %*s %*s %n", &addr, &ends, perms, &nameoff) < 3 || nameoff <= 0) {
            fprintf(stderr, "Cannot parse /proc/self/maps line: '%s'.n", line);
            free_mappings(list);
            free(line);
            fclose(maps);
            errno = EIO;
            return NULL;
        }

        size_t  namelen = strlen(line + nameoff);

        curr = malloc(sizeof (struct mapping) + namelen + 1);
        if (!curr) {
            fprintf(stderr, "Out of memory parsing /proc/self/maps.n");
            free_mappings(list);
            free(line);
            fclose(maps);
            errno = ENOMEM;
            return NULL;
        }

        curr->addr = (void *)addr;
        curr->ends = (void *)ends;
        curr->perm = ((strchr(perms, 'r')) ? PERM_R : 0)
                   | ((strchr(perms, 'w')) ? PERM_W : 0)
                   | ((strchr(perms, 'x')) ? PERM_X : 0)
                   | ((strchr(perms, 'p')) ? PERM_P : 0)
                   | ((strchr(perms, 's')) ? PERM_S : 0);

        if (namelen > 0)
            memcpy(curr->name, line + nameoff, namelen);

        curr->name[namelen] = '';

        curr->next = list;
        list       = curr;
    }

    free(line);
    line = NULL;
    size = 0;

    if (ferror(maps) || !feof(maps)) {
        fclose(maps);
        free_mappings(list);
        fprintf(stderr, "Error reading /proc/self/maps.n");
        errno = EIO;
        return NULL;
    } else
    if (fclose(maps)) {
        free_mappings(list);
        fprintf(stderr, "Error closing /proc/self/maps.n");
        errno = EIO;
        return NULL;
    } else
    if (!list) {
        fprintf(stderr, "No mappings listed in /proc/self/maps.n");
        errno = EIO;
        return NULL;
    }

    return reverse_mappings(list);
}

static inline int  within(const void *const start, const void *const limit, const volatile void *addr)
{
    return ((unsigned long)start <= (unsigned long)addr) && ((unsigned long)addr < (unsigned long)limit);
}

/* Stuff some runtime-dependent data to a volatile buffer. */
static inline unsigned int  stuff(volatile unsigned char *dst, const size_t len)
{
    volatile unsigned char *const end = dst + len;
    unsigned int                  val = 11385419 * (unsigned int)getpid()
                                      + 1792824239 * (unsigned int)time(NULL);
    while (dst < end) {
        *(dst++) = val;
        /* Permute data using Marsaglia; 32-bit XOR Xorshift variant. */
        val ^= val << 13;
        val ^= val >> 17;
        val ^= val << 5;
    }

    return val;
}

static volatile unsigned char  static_global[BUFFER_SIZE];

int main(void)
{
    static volatile unsigned char  static_local[BUFFER_SIZE];
    volatile unsigned char         nonstatic_local[BUFFER_SIZE];

    /* Just stuff runtime data to the three buffers. */
    if (!stuff(static_global, sizeof static_global) ||
        !stuff(static_local, sizeof static_local) ||
        !stuff(nonstatic_local, sizeof nonstatic_local)) {
        fprintf(stderr, "Note: We used a bad Xorshift seed.  No harm done.n");
    }

    /* Do a small (1 KiB) malloc(). */
    void  *small = malloc(1024);
    if (!small) {
        fprintf(stderr, "Note: Failed to allocate 1 KiB.n");
    }

    /* Do a large (2 MiB) malloc(). */
    void  *large = malloc(2048*1024);
    if (!large) {
        fprintf(stderr, "Note: Failed to allocate 2 MiB.n");
    }

    /* Do a 64 page anonymous mmap(), without PROT_EXEC. */
    size_t  page = sysconf(_SC_PAGESIZE);
    void   *enough = mmap(NULL, 64 * page, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (enough == MAP_FAILED) {
        fprintf(stderr, "Note: Failed to acquire a 64-page anonymous read-write memory map.n");
        enough = NULL;
    }

    struct mapping *maps = get_mappings();
    if (!maps)
        return EXIT_FAILURE;

    for (struct mapping *curr = maps; curr != NULL; curr = curr->next) {
        const char *executable = (curr->perm & PERM_X) ? "executable" : "not executable";
        if (within(curr->addr, curr->ends, &static_global))
            printf("static volatile unsigned char [] in file scope is %s.n", executable);
        if (within(curr->addr, curr->ends, &static_local))
            printf("static volatile unsigned char [] in main() is %s.n", executable);
        if (within(curr->addr, curr->ends, &nonstatic_local))
            printf("volatile unsigned char [] in main() is %s.n", executable);
        if (within(curr->addr, curr->ends, small))
            printf("Small allocations using malloc() are %s.n", executable);
        if (within(curr->addr, curr->ends, large))
            printf("Large allocations using malloc() are %s.n", executable);
        if (within(curr->addr, curr->ends, enough))
            printf("Anonymous private PROT_READ|PROT_WRITE memory maps are %s.n", executable);
    };

    for (struct mapping *curr = maps; curr != NULL; curr = curr->next) {
        const char *executable = (curr->perm & PERM_X) ? "executable" : "not executable";
        if (!strcmp(curr->name, "[stack]"))
            printf("Stack (%p..%p) is %sn", curr->addr, curr->ends, executable);
        else
        if (!strcmp(curr->name, "[heap]"))
            printf("Heap (%p..%p) is %sn", curr->addr, curr->ends, executable);
        else
        if (curr->name[0] != '/' && (curr->perm & PERM_RWP) == PERM_RWP) {
            printf("%s (%p..%p) is %sn", (curr->perm & PERM_S) ? "Shared data" : "Data", curr->addr, curr->ends, executable);
        }
    }

    return EXIT_SUCCESS;
}

If you save the above as say check.c, you can compile it using for example

gcc -Wall -O2 -std=c99 check.c -o check.gcc.default
gcc -Wall -O2 -std=c99 -z execstack check.c -o check.gcc.execstack
clang -Wall -O2 -std=c99 check.c -o check.clang.default
clang -Wall -O2 -std=c99 -z execstack check.c -o check.clang.execstack

Running an Ubuntu 5.4.0-74-generic kernel on x86-64, the .default versions (without execstack linker option) report:

static volatile unsigned char [] in file scope is not executable.
static volatile unsigned char [] in main() is not executable.
Small allocations using malloc() are not executable.
Large allocations using malloc() are not executable.
Anonymous private PROT_READ|PROT_WRITE memory maps are not executable.
volatile unsigned char [] in main() is not executable.
Heap (0x1f3a000..0x1f5b000) is not executable
Data (0x7f5e40378000..0x7f5e40579000) is not executable
Data (0x7f5e40794000..0x7f5e40798000) is not executable
Data (0x7f5e40d89000..0x7f5e40d8d000) is not executable
Data (0x7f5e41151000..0x7f5e41155000) is not executable
Data (0x7f5e4117d000..0x7f5e411bd000) is not executable
Data (0x7f5e411bf000..0x7f5e411c0000) is not executable
Stack (0x7ffd124ef000..0x7ffd12510000) is not executable

except that because this kernel has address space randomization enabled, the exact addresses above will vary from execution to execution (and this is what we normally want, too); and GCC and Clang tend to use slightly different address ranges, but that too is fine.

As you can see, no data (stack, heap, allocations, or anonymous memory maps without PROT_EXEC) is executable.

Running the .execstack binaries, however, report:

static volatile unsigned char [] in file scope is executable.
static volatile unsigned char [] in main() is executable.
Small allocations using malloc() are executable.
Large allocations using malloc() are executable.
Anonymous private PROT_READ|PROT_WRITE memory maps are executable.
volatile unsigned char [] in main() is executable.
Heap (0xaec000..0xb0d000) is executable
Data (0x7f1f116bd000..0x7f1f118be000) is executable
Data (0x7f1f11ad9000..0x7f1f11add000) is executable
Data (0x7f1f120ce000..0x7f1f120d2000) is executable
Data (0x7f1f12496000..0x7f1f1249a000) is executable
Data (0x7f1f124c2000..0x7f1f12502000) is executable
Data (0x7f1f12504000..0x7f1f12505000) is executable
Stack (0x7ffd5ed68000..0x7ffd5ed89000) is executable

This means that the ELF GNU_STACK segment protections are not applied to the stack only, but basically all allocations the process can make. Even requesting read-write, non-executable anonymous memory, gives the process executable memory.

The question is, does the original asker really see different output running the .execstack/-z execstack binaries on different kernel versions, or have they fallen foul of Clang’s helpfulness?

You see, if you copy one of the many "here’s how you can prove you can run code on stack in Linux by default" code snippets which include things like volatile unsigned char injected[] = { 0xf3, 0xc3 }; (the two bytes being the minimal implementation for a C void nothing(void) { return; } function), Clang is helpful and instead of putting injected[] on the heap or stack, it will try and put it in the code section instead. In other words, when creating such code, one must always examine the generated machine code to see where it actually puts the code bytes to be executed, and whether it executes those or just a copy in a code segment.

I admit I was a bit peeved to see the initial claim, because it looked very much like someone trying to prove Linux does not default to proper non-executable stack and data regions, by using silly code that does not do what they think they do –– this is what well over half of such code "examples" you find on the internet are: garbage, mistakes, and sheer lies.

However, given the crucial tidbit that all this is related to the execstack linker option (implemented for backwards compatibility, so that users who want to run programs that only work if they have executable stack, can do so), that changes the question from being related to stack smashing/hacking, into a possible backwards compatibility issue. We do care about those, you see.

Hopefully, OP will report whether they do really see different output of the above test program on different kernel versions with execstack linker option enabled.

If they do, the next step is to pinpoint the change. It must be later than 5.4.0 (roughly November 2019), and is almost certainly in either fs/exec.c or fs/binfmt_elf.c –– these being links to their modification history. There are a lot of changes done to these in the last two years, so installing additional kernel versions to bracket the version which introduced the change in behaviour would probably be faster than poring over those changesets.

- Rance
- August 20, 2022 at 3:40 pm
- 0 votes
0
I’m doing self-study handout on CS:APP’s 4th lab and may get the same problem. Challenge level2 requires to change a global vriable. Most of previous writeups inject some shellcode which executes on the RWX stack, and the stack is actually a heap malloced by programmer. Previous writeups use the buffer to write shellcode and let return address point to shellcode in buffer. However, when I tried the same way on my Ubuntu 22.04 (linux 5.15.0-46-generic) today, a SIGSEGV appeared. The weird thing is that it works on my Archlinux successfully, but that happened in March, 2022. I have no idea if it would work now.

Login or Signup to reply.

Please signup or login to give your own answer.

Click here to cancel reply.

Which feature does linux kernel update that lead to heap not executable on linux 5.10 but executable on linux 5.0 while NX(DEP) disabled – Debian

Answers