- Nov 2024
-
elixir.bootlin.com elixir.bootlin.com
-
#ifdef CONFIG_ARCH_HAS_HUGEPD static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { unsigned long __boundary = (addr + sz) & ~(sz-1); return (__boundary - 1 < end - 1) ? __boundary : end; } static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long pte_end; struct page *page; struct folio *folio; pte_t pte; int refs; pte_end = (addr + sz) & ~(sz-1); if (pte_end < end) end = pte_end; pte = huge_ptep_get(ptep); if (!pte_access_permitted(pte, flags & FOLL_WRITE)) return 0; /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr)) return 0; } while (ptep++, addr = next, addr != end); return 1; } #else static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { return 0; } #endif /* CONFIG_ARCH_HAS_HUGEPD */ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pmd_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pmd(orig, pmdp, addr, end, flags, pages, nr); } page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pud_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pud_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pud(orig, pudp, addr, end, flags, pages, nr); } page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { int refs; struct page *page; struct folio *folio; if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; BUILD_BUG_ON(pgd_devmap(orig)); page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!pgd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset_lockless(pudp, pud, addr); do { pmd_t pmd = pmdp_get_lockless(pmdp); next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) return 0; if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))) { /* See gup_pte_range() */ if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { /* * architecture have different format for hugetlbfs * pmd format and THP pmd format */ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, PMD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; } static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; pudp = pud_offset_lockless(p4dp, p4d, addr); do { pud_t pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, PUD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; p4d_t *p4dp; p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { p4d_t p4d = READ_ONCE(*p4dp); next = p4d_addr_end(addr, end); if (p4d_none(p4d)) return 0; BUILD_BUG_ON(p4d_huge(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); return 1; } static void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pgd_t *pgdp; pgdp = pgd_offset(current->mm, addr); do { pgd_t pgd = READ_ONCE(*pgdp); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) return; if (unlikely(pgd_huge(pgd))) { if (!gup_huge_pgd(pgd, pgdp, addr, next, flags, pages, nr)) return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) return; } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) return; } while (pgdp++, addr = next, addr != end); } #else static inline void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { }
policy use functions for gup_huge pte policy code function above (not right above, gotta scroll probably to find it)
-
static int internal_get_user_pages_fast(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { unsigned long len, end; unsigned long nr_pinned; int locked = 0; int ret; if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | FOLL_FAST_ONLY | FOLL_NOFAULT | FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT))) return -EINVAL; if (gup_flags & FOLL_PIN) mm_set_has_pinned_flag(¤t->mm->flags); if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); start = untagged_addr(start) & PAGE_MASK; len = nr_pages << PAGE_SHIFT; if (check_add_overflow(start, len, &end)) return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages); if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) return nr_pinned; /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned, pages, &locked, gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE); if (ret < 0) { /* * The caller has to unpin the pages we already pinned so * returning -errno is not an option */ if (nr_pinned) return nr_pinned; return ret; } return ret + nr_pinned; } /** * get_user_pages_fast_only() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. * * If the architecture does not support this function, simply return with no * pages pinned. * * Careful, careful! COW breaking can go either way, so a non-write * access can get ambiguous page results. If you call this function without * 'write' set, you'd better be sure that you're ok with that ambiguity. */ int get_user_pages_fast_only(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * Internally (within mm/gup.c), gup fast variants must set FOLL_GET, * because gup fast is always a "pin with a +1 page refcount" request. * * FOLL_FAST_ONLY is required in order to match the API description of * this routine: no fall back to regular ("slow") GUP. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET | FOLL_FAST_ONLY)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast_only); /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Attempt to pin user pages in memory without taking mm->mmap_lock. * If not successful, it will fall back to taking the lock and * calling get_user_pages(). * * Returns number of pages pinned. This may be fewer than the number requested. * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns * -errno. */ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * The caller may or may not have explicitly set FOLL_GET; either way is * OK. However, internally (within mm/gup.c), gup fast variants must set * FOLL_GET, because gup fast is always a "pin with a +1 page refcount" * request. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast); /** * pin_user_pages_fast() - pin user pages in memory without taking locks * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See * get_user_pages_fast() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for further details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page() will not remove pins from it. */ int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(pin_user_pages_fast); /** * pin_user_pages_remote() - pin pages of a remote process * * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @locked: pointer to lock flag indicating whether lock is held and * subsequently whether VM_FAULT_RETRY functionality can be * utilised. Lock must initially be held. * * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See * get_user_pages_remote() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked) { int local_locked = 1; if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE)) return 0; return __gup_longterm_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages_remote); /** * pin_user_pages() - pin user pages in memory for use by other devices * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and * FOLL_PIN is set. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { int locked = 1; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages); /* * pin_user_pages_unlocked() is the FOLL_PIN variant of * get_user_pages_unlocked(). Behavior is the same, except that this one sets * FOLL_PIN and rejects FOLL_GET. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { int locked = 0; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); }
fast gup functions
-
/** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. * @npages: number of pages in the @pages array. * * For each page in the @pages array, release the page using unpin_user_page(). * * Please see the unpin_user_page() documentation for details. */ void unpin_user_pages(struct page **pages, unsigned long npages) { unsigned long i; struct folio *folio; unsigned int nr; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by * leaving them pinned), but probably not. More likely, gup/pup returned * a hard -ERRNO error to the caller, who erroneously passed it here. */ if (WARN_ON(IS_ERR_VALUE(npages))) return; sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); gup_put_folio(folio, nr, FOLL_PIN); } }
gup unpin function, not actual logic
-
void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty) { unsigned long i; struct folio *folio; unsigned int nr; for (i = 0; i < npages; i += nr) { folio = gup_folio_range_next(page, npages, i, &nr); if (make_dirty && !folio_test_dirty(folio)) { folio_lock(folio); folio_mark_dirty(folio); folio_unlock(folio); } gup_put_folio(folio, nr, FOLL_PIN); } }
unpin logic but for dirty pages
-
if ((flags & FOLL_DUMP) && (vma_is_anonymous(vma) || !vma->vm_ops->fault)) return ERR_PTR(-EFAULT); return NULL;
explained in comments
-
static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, unsigned int flags, struct page **pages) { while ((*nr) - nr_start) { struct page *page = pages[--(*nr)]; ClearPageReferenced(page); if (flags & FOLL_PIN) unpin_user_page(page); else put_page(page); } }
policy use function that undoes mapping
-
#ifdef CONFIG_MIGRATION /* * Returns the number of collected pages. Return value is always >= 0. */ static unsigned long collect_longterm_unpinnable_pages( struct list_head *movable_page_list, unsigned long nr_pages, struct page **pages) { unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; for (i = 0; i < nr_pages; i++) { struct folio *folio = page_folio(pages[i]); if (folio == prev_folio) continue; prev_folio = folio; if (folio_is_longterm_pinnable(folio)) continue; collected++; if (folio_is_device_coherent(folio)) continue; if (folio_test_hugetlb(folio)) { isolate_hugetlb(folio, movable_page_list); continue; } if (!folio_test_lru(folio) && drain_allow) { lru_add_drain_all(); drain_allow = false; } if (!folio_isolate_lru(folio)) continue; list_add_tail(&folio->lru, movable_page_list); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } return collected; }
-
#ifdef CONFIG_ELF_CORE struct page *get_dump_page(unsigned long addr) { struct page *page; int locked = 0; int ret; ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked, FOLL_FORCE | FOLL_DUMP | FOLL_GET); return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */
part of policy use code likely
-
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; long ret = 0; end = start + len; for (nstart = start; nstart < end; nstart = nend) { /* * We want to fault in pages for [nstart; end) address range. * Find first corresponding VMA. */ if (!locked) { locked = 1; mmap_read_lock(mm); vma = find_vma_intersection(mm, nstart, end); } else if (nstart >= vma->vm_end) vma = find_vma_intersection(mm, vma->vm_end, end); if (!vma) break; /* * Set [nstart; nend) to intersection of desired address * range with the first VMA. Also, skip undesirable VMA types. */ nend = min(end, vma->vm_end); if (vma->vm_flags & (VM_IO | VM_PFNMAP)) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; /* * Now fault in a range of pages. populate_vma_page_range() * double checks the vma flags, so that it won't mlock pages * if the vma was already munlocked. */ ret = populate_vma_page_range(vma, nstart, nend, &locked); if (ret < 0) { if (ignore_errors) { ret = 0; continue; /* continue at next VMA */ } break; } nend = nstart + ret * PAGE_SIZE; ret = 0; } if (locked) mmap_read_unlock(mm); return ret; /* 0 or negative error code */ }
policy use function that populates pages like the func before this.
-
long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked) { struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int local_locked = 1; int gup_flags; long ret; VM_BUG_ON(!PAGE_ALIGNED(start)); VM_BUG_ON(!PAGE_ALIGNED(end)); VM_BUG_ON_VMA(start < vma->vm_start, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); /* * Rightly or wrongly, the VM_LOCKONFAULT case has never used * faultin_page() to break COW, so it has no work to do here. */ if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; /* * We want mlock to succeed for regions that have any permissions * other than PROT_NONE. */ if (vma_is_accessible(vma)) gup_flags |= FOLL_FORCE; if (locked) gup_flags |= FOLL_UNLOCKABLE; /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ ret = __get_user_pages(mm, start, nr_pages, gup_flags, NULL, locked ? locked : &local_locked); lru_add_drain(); return ret; }
policy use code.
-
if (!(flags & FOLL_INTERRUPTIBLE)) return false;
fatal fault signal handler
-
#ifdef CONFIG_STACK_GROWSUP return vma_lookup(mm, addr); #else static volatile unsigned long next_warn; struct vm_area_struct *vma; unsigned long now, next; vma = find_vma(mm, addr); if (!vma || (addr >= vma->vm_start)) return vma; /* Only warn for half-way relevant accesses */ if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; if (vma->vm_start - addr > 65536) return NULL; /* Let's not warn more than once an hour.. */ now = jiffies; next = next_warn; if (next && time_before(now, next)) return NULL; next_warn = now + 60*60*HZ; /* Let people know things may have changed. */ pr_warn("GUP no longer grows the stack in %s (%d): %lx-%lx (%lx)\n", current->comm, task_pid_nr(current), vma->vm_start, vma->vm_end, addr); dump_stack(); return NULL;
helper func to lookup vma(virtual mem area) that warns per hour about half way relevant acc and changes in stack
-
if (*flags & FOLL_NOFAULT) return -EFAULT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) fault_flags |= FAULT_FLAG_REMOTE; if (*flags & FOLL_UNLOCKABLE) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE. * That's because some callers may not be prepared to * handle early exits caused by non-fatal signals. */ if (*flags & FOLL_INTERRUPTIBLE) fault_flags |= FAULT_FLAG_INTERRUPTIBLE; } if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; if (*flags & FOLL_TRIED) { /* * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED * can co-exist */ fault_flags |= FAULT_FLAG_TRIED; } if (unshare) { fault_flags |= FAULT_FLAG_UNSHARE; /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */ VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE); } ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_COMPLETED) { /* * With FAULT_FLAG_RETRY_NOWAIT we'll never release the * mmap lock in the page fault handler. Sanity check this. */ WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT); *locked = 0; /* * We should do the same as VM_FAULT_RETRY, but let's not * return -EBUSY since that's not reflecting the reality of * what has happened - we've just fully completed a page * fault, with the mmap lock released. Use -EAGAIN to show * that we want to take the mmap lock _again_. */ return -EAGAIN; } if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); if (err) return err; BUG(); } if (ret & VM_FAULT_RETRY) { if (!(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; return -EBUSY; }
Seems it's just setting flags for page faults based on flags param
-
if (likely(!pmd_trans_huge(pmdval))) return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags)) return no_page_table(vma, flags); ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_present(*pmd))) { spin_unlock(ptl); return no_page_table(vma, flags); } if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); }
branch prediction to check if pmd is there and if it's big
-
if (pmd_none(pmdval)) return no_page_table(vma, flags); if (!pmd_present(pmdval)) return no_page_table(vma, flags); if (pmd_devmap(pmdval)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap); spin_unlock(ptl); if (page) return page; }
checks if pmd is there. im assuming it's page middle dir.
-
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { unsigned long i; struct folio *folio; unsigned int nr; if (!make_dirty) { unpin_user_pages(pages, npages); return; } sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key * cases: * * 1) This code sees the page as already dirty, so it * skips the call to set_page_dirty(). That could happen * because clear_page_dirty_for_io() called * page_mkclean(), followed by set_page_dirty(). * However, now the page is going to get written back, * which meets the original intention of setting it * dirty, so all is well: clear_page_dirty_for_io() goes * on to call TestClearPageDirty(), and write the page * back. * * 2) This code sees the page as clean, so it calls * set_page_dirty(). The page stays dirty, despite being * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ if (!folio_test_dirty(folio)) { folio_lock(folio); folio_mark_dirty(folio); folio_unlock(folio); } gup_put_folio(folio, nr, FOLL_PIN); } }
unpins and dirties page
-
static inline struct folio *gup_folio_next(struct page **list, unsigned long npages, unsigned long i, unsigned int *ntails) { struct folio *folio = page_folio(list[i]); unsigned int nr; for (nr = i + 1; nr < npages; nr++) { if (page_folio(list[nr]) != folio) break; } *ntails = nr - i; return folio; }
gets folio of next page along with reference to end of folio
-
static inline struct folio *gup_folio_range_next(struct page *start, unsigned long npages, unsigned long i, unsigned int *ntails) { struct page *next = nth_page(start, i); struct folio *folio = page_folio(next); unsigned int nr = 1; if (folio_test_large(folio)) nr = min_t(unsigned int, npages - i, folio_nr_pages(folio) - folio_page_idx(folio, next)); *ntails = nr; return folio; }
gets the folio of the next page from start to 'i' range. also gets the tail folio/reference
-
folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
function for adding reference
-
void unpin_user_page(struct page *page) { sanity_check_pinned_pages(&page, 1); gup_put_folio(page_folio(page), 1, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_page);
actual policy use logic
-
struct folio *folio = page_folio(page); if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) return -ENOMEM; if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) return -EREMOTEIO; if (flags & FOLL_GET) folio_ref_inc(folio);
checks for code that is involved in policy but is not the actual logic
-
else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ if (is_zero_page(page)) return 0; /* * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { folio_ref_add(folio, 1); atomic_add(1, &folio->_pincount); } else { folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); }
Logic that actually tries to grab the folio. Also policy use code and not actual policy
-
if (!put_devmap_managed_page_refs(&folio->page, refs)) folio_put_refs(folio, refs);
Definitely a vital and straightforward policy use section of gup that simples places a reference on the folio
-
if (flags & FOLL_PIN) { if (is_zero_folio(folio)) return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); if (folio_test_large(folio)) atomic_sub(refs, &folio->_pincount); else refs *= GUP_PIN_COUNTING_BIAS; }
Checks if the folio is zero/large
-
if (folio_test_large(folio)) atomic_add(refs, &folio->_pincount); else folio_ref_add(folio, refs * (GUP_PIN_COUNTING_BIAS - 1))
maintaining reference counts. Part of policy logic most likely
-
if (unlikely((flags & FOLL_LONGTERM) && !folio_is_longterm_pinnable(folio))) { if (!put_devmap_managed_page_refs(&folio->page, refs)) folio_put_refs(folio, refs); return NULL;
checks for longterm folio pins.
-
if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) return NULL; if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) return NULL;
Time saving predictions(unlikely) and single time warning func(WARN_ON_ONCE) for flags. Not actual policy logic so low confidence.
-
if (is_zero_page(page) || !folio_test_anon(folio)) continue; if (!folio_test_large(folio) || folio_test_hugetlb(folio))
Sanity checks for pinned pages wouldn't classify as policy logic but common sense pre-checks for the actual policy. But I think it's worth tagging this to gain a sense of what is not policy code
-
if (is_zero_page(page)) return page_folio(page); folio = try_get_folio(page, refs); if (!folio) return NULL;
Just trying to check for zero pages and trying to retrieve folios. Unlikely policy logic
-