Search code examples
linuxdebuggingcoredump

How does Linux truncate core dumps?


Core dump size can be restricted using limits (ulimit, rlimit, etc...).

What I'm wondering is how this is actually implemented - for instance: Is the core dump generation smart enough to prioritize the stack? Memory referenced by local variable pointers? Or is it literally the entire address space of the process, truncated at N bytes?


Solution

  • As I recently faced a core dump truncation in some process, I can share my experience on this. The Linux kernel reports a core dump in the context of the crashing process. In the Linux kernel source code, the transfer of the core dump from the kernel to user space area is done by several calls to dump_emit() in fs/coredump.c:

    /*
     * Core dumping helper functions.  These are the only things you should
     * do on a core-file: use only these functions to write out all the
     * necessary info.
     */
    int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
    {
        struct file *file = cprm->file;
        loff_t pos = file->f_pos;
        ssize_t n;
        if (cprm->written + nr > cprm->limit)
            return 0;
        while (nr) {
            if (dump_interrupted())
                return 0;
            n = __kernel_write(file, addr, nr, &pos);
            if (n <= 0)
                return 0;
            file->f_pos = pos;
            cprm->written += n;
            cprm->pos += n;
            nr -= n;
        }
        return 1;
    }
    EXPORT_SYMBOL(dump_emit);
    

    The above function checks two main things:

    • Is the current size of the core dump still under the configured limit for the process ?
    if (cprm->written + nr > cprm->limit)
    
    • Is there a pending signal for the current process?
    if (dump_interrupted())
    

    If one of the above checks fails, the core dump transfer from kernel to user space is interrupted and so, the resulting core file is truncated at any point. In a ELF configured kernel, the above service is for example called by elf_core_dump() from fs/binfmt_elf.c:

    /*
     * Actual dumper
     *
     * This is a two-pass process; first we find the offsets of the bits,
     * and then they are actually written out.  If we run out of core limit
     * we just truncate.
     */
    static int elf_core_dump(struct coredump_params *cprm)
    {
        int has_dumped = 0;
        mm_segment_t fs;
        int segs, i;
        size_t vma_data_size = 0;
        struct vm_area_struct *vma, *gate_vma;
        struct elfhdr *elf = NULL;
        loff_t offset = 0, dataoff;
        struct elf_note_info info = { };
        struct elf_phdr *phdr4note = NULL;
        struct elf_shdr *shdr4extnum = NULL;
        Elf_Half e_phnum;
        elf_addr_t e_shoff;
        elf_addr_t *vma_filesz = NULL;
    
        /*
         * We no longer stop all VM operations.
         * 
         * This is because those proceses that could possibly change map_count
         * or the mmap / vma pages are now blocked in do_exit on current
         * finishing this core dump.
         *
         * Only ptrace can touch these memory addresses, but it doesn't change
         * the map_count or the pages allocated. So no possibility of crashing
         * exists while dumping the mm->vm_next areas to the core file.
         */
    
        /* alloc memory for large data structures: too large to be on stack */
        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
        if (!elf)
            goto out;
        /*
         * The number of segs are recored into ELF header as 16bit value.
         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
         */
        segs = current->mm->map_count;
        segs += elf_core_extra_phdrs();
    
        gate_vma = get_gate_vma(current->mm);
        if (gate_vma != NULL)
            segs++;
    
        /* for notes section */
        segs++;
    
        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
         * this, kernel supports extended numbering. Have a look at
         * include/linux/elf.h for further information. */
        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
    
        /*
         * Collect all the non-memory information about the process for the
         * notes.  This also sets up the file header.
         */
        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
            goto cleanup;
    
        has_dumped = 1;
    
        fs = get_fs();
        set_fs(KERNEL_DS);
    
        offset += sizeof(*elf);             /* Elf header */
        offset += segs * sizeof(struct elf_phdr);   /* Program headers */
    
        /* Write notes phdr entry */
        {
            size_t sz = get_note_info_size(&info);
    
            sz += elf_coredump_extra_notes_size();
    
            phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
            if (!phdr4note)
                goto end_coredump;
    
            fill_elf_note_phdr(phdr4note, sz, offset);
            offset += sz;
        }
    
        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
    
        if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
            goto end_coredump;
        vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
        if (!vma_filesz)
            goto end_coredump;
    
        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
                vma = next_vma(vma, gate_vma)) {
            unsigned long dump_size;
    
            dump_size = vma_dump_size(vma, cprm->mm_flags);
            vma_filesz[i++] = dump_size;
            vma_data_size += dump_size;
        }
    
        offset += vma_data_size;
        offset += elf_core_extra_data_size();
        e_shoff = offset;
    
        if (e_phnum == PN_XNUM) {
            shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
            if (!shdr4extnum)
                goto end_coredump;
            fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
        }
    
        offset = dataoff;
    
        if (!dump_emit(cprm, elf, sizeof(*elf)))
            goto end_coredump;
    
        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
            goto end_coredump;
    
        /* Write program headers for segments dump */
        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
                vma = next_vma(vma, gate_vma)) {
            struct elf_phdr phdr;
    
            phdr.p_type = PT_LOAD;
            phdr.p_offset = offset;
            phdr.p_vaddr = vma->vm_start;
            phdr.p_paddr = 0;
            phdr.p_filesz = vma_filesz[i++];
            phdr.p_memsz = vma->vm_end - vma->vm_start;
            offset += phdr.p_filesz;
            phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
            if (vma->vm_flags & VM_WRITE)
                phdr.p_flags |= PF_W;
            if (vma->vm_flags & VM_EXEC)
                phdr.p_flags |= PF_X;
            phdr.p_align = ELF_EXEC_PAGESIZE;
    
            if (!dump_emit(cprm, &phdr, sizeof(phdr)))
                goto end_coredump;
        }
    
        if (!elf_core_write_extra_phdrs(cprm, offset))
            goto end_coredump;
    
        /* write out the notes section */
        if (!write_note_info(&info, cprm))
            goto end_coredump;
        
        if (elf_coredump_extra_notes_write(cprm))
            goto end_coredump;
    
        /* Align to page */
        if (!dump_skip(cprm, dataoff - cprm->pos))
            goto end_coredump;
    
        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
                vma = next_vma(vma, gate_vma)) {
            unsigned long addr;
            unsigned long end;
    
            end = vma->vm_start + vma_filesz[i++];
    
            for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
                struct page *page;
                int stop;
    
                page = get_dump_page(addr);
                if (page) {
                    void *kaddr = kmap(page);
                    stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
                    kunmap(page);
                    put_page(page);
                } else
                    stop = !dump_skip(cprm, PAGE_SIZE);
                if (stop)
                    goto end_coredump;
            }
        }
        dump_truncate(cprm);
    
        if (!elf_core_write_extra_data(cprm))
            goto end_coredump;
    
        if (e_phnum == PN_XNUM) {
            if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
                goto end_coredump;
        }
    
    end_coredump:
        set_fs(fs);
    
    cleanup:
        free_note_info(&info);
        kfree(shdr4extnum);
        vfree(vma_filesz);
        kfree(phdr4note);
        kfree(elf);
    out:
        return has_dumped;
    }