- Nov 2024
-
elixir.bootlin.com elixir.bootlin.com
-
#ifdef CONFIG_ARCH_HAS_HUGEPD static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { unsigned long __boundary = (addr + sz) & ~(sz-1); return (__boundary - 1 < end - 1) ? __boundary : end; } static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long pte_end; struct page *page; struct folio *folio; pte_t pte; int refs; pte_end = (addr + sz) & ~(sz-1); if (pte_end < end) end = pte_end; pte = huge_ptep_get(ptep); if (!pte_access_permitted(pte, flags & FOLL_WRITE)) return 0; /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr)) return 0; } while (ptep++, addr = next, addr != end); return 1; } #else static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { return 0; } #endif /* CONFIG_ARCH_HAS_HUGEPD */ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pmd_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pmd(orig, pmdp, addr, end, flags, pages, nr); } page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pud_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pud_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pud(orig, pudp, addr, end, flags, pages, nr); } page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { int refs; struct page *page; struct folio *folio; if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; BUILD_BUG_ON(pgd_devmap(orig)); page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!pgd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset_lockless(pudp, pud, addr); do { pmd_t pmd = pmdp_get_lockless(pmdp); next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) return 0; if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))) { /* See gup_pte_range() */ if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { /* * architecture have different format for hugetlbfs * pmd format and THP pmd format */ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, PMD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; } static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; pudp = pud_offset_lockless(p4dp, p4d, addr); do { pud_t pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, PUD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; p4d_t *p4dp; p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { p4d_t p4d = READ_ONCE(*p4dp); next = p4d_addr_end(addr, end); if (p4d_none(p4d)) return 0; BUILD_BUG_ON(p4d_huge(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); return 1; } static void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pgd_t *pgdp; pgdp = pgd_offset(current->mm, addr); do { pgd_t pgd = READ_ONCE(*pgdp); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) return; if (unlikely(pgd_huge(pgd))) { if (!gup_huge_pgd(pgd, pgdp, addr, next, flags, pages, nr)) return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) return; } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) return; } while (pgdp++, addr = next, addr != end); } #else static inline void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { }
policy use functions for gup_huge pte policy code function above (not right above, gotta scroll probably to find it)
-
static int internal_get_user_pages_fast(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { unsigned long len, end; unsigned long nr_pinned; int locked = 0; int ret; if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | FOLL_FAST_ONLY | FOLL_NOFAULT | FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT))) return -EINVAL; if (gup_flags & FOLL_PIN) mm_set_has_pinned_flag(¤t->mm->flags); if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); start = untagged_addr(start) & PAGE_MASK; len = nr_pages << PAGE_SHIFT; if (check_add_overflow(start, len, &end)) return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages); if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) return nr_pinned; /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned, pages, &locked, gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE); if (ret < 0) { /* * The caller has to unpin the pages we already pinned so * returning -errno is not an option */ if (nr_pinned) return nr_pinned; return ret; } return ret + nr_pinned; } /** * get_user_pages_fast_only() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. * * If the architecture does not support this function, simply return with no * pages pinned. * * Careful, careful! COW breaking can go either way, so a non-write * access can get ambiguous page results. If you call this function without * 'write' set, you'd better be sure that you're ok with that ambiguity. */ int get_user_pages_fast_only(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * Internally (within mm/gup.c), gup fast variants must set FOLL_GET, * because gup fast is always a "pin with a +1 page refcount" request. * * FOLL_FAST_ONLY is required in order to match the API description of * this routine: no fall back to regular ("slow") GUP. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET | FOLL_FAST_ONLY)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast_only); /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Attempt to pin user pages in memory without taking mm->mmap_lock. * If not successful, it will fall back to taking the lock and * calling get_user_pages(). * * Returns number of pages pinned. This may be fewer than the number requested. * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns * -errno. */ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * The caller may or may not have explicitly set FOLL_GET; either way is * OK. However, internally (within mm/gup.c), gup fast variants must set * FOLL_GET, because gup fast is always a "pin with a +1 page refcount" * request. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast); /** * pin_user_pages_fast() - pin user pages in memory without taking locks * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See * get_user_pages_fast() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for further details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page() will not remove pins from it. */ int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(pin_user_pages_fast); /** * pin_user_pages_remote() - pin pages of a remote process * * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @locked: pointer to lock flag indicating whether lock is held and * subsequently whether VM_FAULT_RETRY functionality can be * utilised. Lock must initially be held. * * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See * get_user_pages_remote() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked) { int local_locked = 1; if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE)) return 0; return __gup_longterm_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages_remote); /** * pin_user_pages() - pin user pages in memory for use by other devices * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and * FOLL_PIN is set. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { int locked = 1; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages); /* * pin_user_pages_unlocked() is the FOLL_PIN variant of * get_user_pages_unlocked(). Behavior is the same, except that this one sets * FOLL_PIN and rejects FOLL_GET. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { int locked = 0; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); }
fast gup functions
-
/** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. * @npages: number of pages in the @pages array. * * For each page in the @pages array, release the page using unpin_user_page(). * * Please see the unpin_user_page() documentation for details. */ void unpin_user_pages(struct page **pages, unsigned long npages) { unsigned long i; struct folio *folio; unsigned int nr; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by * leaving them pinned), but probably not. More likely, gup/pup returned * a hard -ERRNO error to the caller, who erroneously passed it here. */ if (WARN_ON(IS_ERR_VALUE(npages))) return; sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); gup_put_folio(folio, nr, FOLL_PIN); } }
gup unpin function, not actual logic
-
if ((flags & FOLL_DUMP) && (vma_is_anonymous(vma) || !vma->vm_ops->fault)) return ERR_PTR(-EFAULT); return NULL;
explained in comments
-
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL /* * Fast-gup relies on pte change detection to avoid concurrent pgtable * operations. * * To pin the page, fast-gup needs to do below in order: * (1) pin the page (by prefetching pte), then (2) check pte not changed. * * For the rest of pgtable operations where pgtable updates can be racy * with fast-gup, we need to do (1) clear pte, then (2) check whether page * is pinned. * * Above will work for all pte-level operations, including THP split. * * For THP collapse, it's a bit more complicated because fast-gup may be * walking a pgtable page that is being freed (pte is still valid but pmd * can be cleared already). To avoid race in such condition, we need to * also check pmd here to make sure pmd doesn't change (corresponds to * pmdp_collapse_flush() in the THP collapse code path). */ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; int nr_start = *nr, ret = 0; pte_t *ptep, *ptem; ptem = ptep = pte_offset_map(&pmd, addr); if (!ptep) return 0; do { pte_t pte = ptep_get_lockless(ptep); struct page *page; struct folio *folio; /* * Always fallback to ordinary GUP on PROT_NONE-mapped pages: * pte_access_permitted() better should reject these pages * either way: otherwise, GUP-fast might succeed in * cases where ordinary GUP would fail due to VMA access * permissions. */ if (pte_protnone(pte)) goto pte_unmap; if (!pte_access_permitted(pte, flags & FOLL_WRITE)) goto pte_unmap; if (pte_devmap(pte)) { if (unlikely(flags & FOLL_LONGTERM)) goto pte_unmap; pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, flags, pages); goto pte_unmap; } } else if (pte_special(pte)) goto pte_unmap; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); folio = try_grab_folio(page, 1, flags); if (!folio) goto pte_unmap; if (unlikely(folio_is_secretmem(folio))) { gup_put_folio(folio, 1, flags); goto pte_unmap; } if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) || unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, 1, flags); goto pte_unmap; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } /* * We need to make the page accessible if and only if we are * going to access its content (the FOLL_PIN case). Please * see Documentation/core-api/pin_user_pages.rst for * details. */ if (flags & FOLL_PIN) { ret = arch_make_page_accessible(page); if (ret) { gup_put_folio(folio, 1, flags); goto pte_unmap; } } folio_set_referenced(folio); pages[*nr] = page; (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); ret = 1; pte_unmap: if (pgmap) put_dev_pagemap(pgmap); pte_unmap(ptem); return ret; } #else /* * If we can't determine whether or not a pte is special, then fail immediately * for ptes. Note, we can still pin HugeTLB and THP as these are guaranteed not * to be special. * * For a futex to be placed on a THP tail page, get_futex_key requires a * get_user_pages_fast_only implementation that can pin pages. Thus it's still * useful to have gup_huge_pmd even if we can't operate on ptes. */ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { return 0; } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
non concurrent fast gup approach that checks for pinned page and unmaps pte or clears it
-
#ifdef CONFIG_HAVE_FAST_GUP /* * Used in the GUP-fast path to determine whether a pin is permitted for a * specific folio. * * This call assumes the caller has pinned the folio, that the lowest page table * level still points to this folio, and that interrupts have been disabled. * * Writing to pinned file-backed dirty tracked folios is inherently problematic * (see comment describing the writable_file_mapping_allowed() function). We * therefore try to avoid the most egregious case of a long-term mapping doing * so. * * This function cannot be as thorough as that one as the VMA is not available * in the fast path, so instead we whitelist known good cases and if in doubt, * fall back to the slow path. */ static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags) { struct address_space *mapping; unsigned long mapping_flags; /* * If we aren't pinning then no problematic write can occur. A long term * pin is the most egregious case so this is the one we disallow. */ if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) != (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) return true; /* The folio is pinned, so we can safely access folio fields. */ if (WARN_ON_ONCE(folio_test_slab(folio))) return false; /* hugetlb mappings do not require dirty-tracking. */ if (folio_test_hugetlb(folio)) return true; /* * GUP-fast disables IRQs. When IRQS are disabled, RCU grace periods * cannot proceed, which means no actions performed under RCU can * proceed either. * * inodes and thus their mappings are freed under RCU, which means the * mapping cannot be freed beneath us and thus we can safely dereference * it. */ lockdep_assert_irqs_disabled(); /* * However, there may be operations which _alter_ the mapping, so ensure * we read it once and only once. */ mapping = READ_ONCE(folio->mapping); /* * The mapping may have been truncated, in any case we cannot determine * if this mapping is safe - fall back to slow path to determine how to * proceed. */ if (!mapping) return false; /* Anonymous folios pose no problem. */ mapping_flags = (unsigned long)mapping & PAGE_MAPPING_FLAGS; if (mapping_flags) return mapping_flags & PAGE_MAPPING_ANON; /* * At this point, we know the mapping is non-null and points to an * address_space object. The only remaining whitelisted file system is * shmem. */ return shmem_mapping(mapping); }
policy logic. avoids locks unlike get user pages unlocked/locked which seems risky so its not supposed to be used on concurrent gup logic
-
long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { int locked = 1; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_TOUCH)) return -EINVAL; return __get_user_pages_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); }
policy logic.
-
#ifdef CONFIG_MIGRATION /* * Returns the number of collected pages. Return value is always >= 0. */ static unsigned long collect_longterm_unpinnable_pages( struct list_head *movable_page_list, unsigned long nr_pages, struct page **pages) { unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; for (i = 0; i < nr_pages; i++) { struct folio *folio = page_folio(pages[i]); if (folio == prev_folio) continue; prev_folio = folio; if (folio_is_longterm_pinnable(folio)) continue; collected++; if (folio_is_device_coherent(folio)) continue; if (folio_test_hugetlb(folio)) { isolate_hugetlb(folio, movable_page_list); continue; } if (!folio_test_lru(folio) && drain_allow) { lru_add_drain_all(); drain_allow = false; } if (!folio_isolate_lru(folio)) continue; list_add_tail(&folio->lru, movable_page_list); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } return collected; }
-
#ifdef CONFIG_ELF_CORE struct page *get_dump_page(unsigned long addr) { struct page *page; int locked = 0; int ret; ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked, FOLL_FORCE | FOLL_DUMP | FOLL_GET); return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */
part of policy use code likely
-
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; long ret = 0; end = start + len; for (nstart = start; nstart < end; nstart = nend) { /* * We want to fault in pages for [nstart; end) address range. * Find first corresponding VMA. */ if (!locked) { locked = 1; mmap_read_lock(mm); vma = find_vma_intersection(mm, nstart, end); } else if (nstart >= vma->vm_end) vma = find_vma_intersection(mm, vma->vm_end, end); if (!vma) break; /* * Set [nstart; nend) to intersection of desired address * range with the first VMA. Also, skip undesirable VMA types. */ nend = min(end, vma->vm_end); if (vma->vm_flags & (VM_IO | VM_PFNMAP)) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; /* * Now fault in a range of pages. populate_vma_page_range() * double checks the vma flags, so that it won't mlock pages * if the vma was already munlocked. */ ret = populate_vma_page_range(vma, nstart, nend, &locked); if (ret < 0) { if (ignore_errors) { ret = 0; continue; /* continue at next VMA */ } break; } nend = nstart + ret * PAGE_SIZE; ret = 0; } if (locked) mmap_read_unlock(mm); return ret; /* 0 or negative error code */ }
policy use function that populates pages like the func before this.
-
long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked) { struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int local_locked = 1; int gup_flags; long ret; VM_BUG_ON(!PAGE_ALIGNED(start)); VM_BUG_ON(!PAGE_ALIGNED(end)); VM_BUG_ON_VMA(start < vma->vm_start, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); /* * Rightly or wrongly, the VM_LOCKONFAULT case has never used * faultin_page() to break COW, so it has no work to do here. */ if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; /* * We want mlock to succeed for regions that have any permissions * other than PROT_NONE. */ if (vma_is_accessible(vma)) gup_flags |= FOLL_FORCE; if (locked) gup_flags |= FOLL_UNLOCKABLE; /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ ret = __get_user_pages(mm, start, nr_pages, gup_flags, NULL, locked ? locked : &local_locked); lru_add_drain(); return ret; }
policy use code.
-
long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked) { int local_locked = 1; if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_TOUCH | FOLL_REMOTE)) return -EINVAL; return __get_user_pages_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); }
policy logic
-
static __always_inline long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, int *locked, unsigned int flags) { long ret, pages_done; bool must_unlock = false; /* * The internal caller expects GUP to manage the lock internally and the * lock must be released when this returns. */ if (!*locked) { if (mmap_read_lock_killable(mm)) return -EAGAIN; must_unlock = true; *locked = 1; } else mmap_assert_locked(mm); if (flags & FOLL_PIN) mm_set_has_pinned_flag(&mm->flags); /* * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior * is to set FOLL_GET if the caller wants pages[] filled in (but has * carelessly failed to specify FOLL_GET), so keep doing that, but only * for FOLL_GET, not for the newer FOLL_PIN. * * FOLL_PIN always expects pages to be non-null, but no need to assert * that here, as any failures will be obvious enough. */ if (pages && !(flags & FOLL_PIN)) flags |= FOLL_GET; pages_done = 0; for (;;) { ret = __get_user_pages(mm, start, nr_pages, flags, pages, locked); if (!(flags & FOLL_UNLOCKABLE)) { /* VM_FAULT_RETRY couldn't trigger, bypass */ pages_done = ret; break; } /* VM_FAULT_RETRY or VM_FAULT_COMPLETED cannot return errors */ if (!*locked) { BUG_ON(ret < 0); BUG_ON(ret >= nr_pages); } if (ret > 0) { nr_pages -= ret; pages_done += ret; if (!nr_pages) break; } if (*locked) { /* * VM_FAULT_RETRY didn't trigger or it was a * FOLL_NOWAIT. */ if (!pages_done) pages_done = ret; break; } /* * VM_FAULT_RETRY triggered, so seek to the faulting offset. * For the prefault case (!pages) we only update counts. */ if (likely(pages)) pages += ret; start += ret << PAGE_SHIFT; /* The lock was temporarily dropped, so we must unlock later */ must_unlock = true; retry: /* * Repeat on the address that fired VM_FAULT_RETRY * with both FAULT_FLAG_ALLOW_RETRY and * FAULT_FLAG_TRIED. Note that GUP can be interrupted * by fatal signals of even common signals, depending on * the caller's request. So we need to check it before we * start trying again otherwise it can loop forever. */ if (gup_signal_pending(flags)) { if (!pages_done) pages_done = -EINTR; break; } ret = mmap_read_lock_killable(mm); if (ret) { BUG_ON(ret > 0); if (!pages_done) pages_done = ret; break; } *locked = 1; ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, pages, locked); if (!*locked) { /* Continue to retry until we succeeded */ BUG_ON(ret != 0); goto retry; } if (ret != 1) { BUG_ON(ret > 1); if (!pages_done) pages_done = ret; break; } nr_pages--; pages_done++; if (!nr_pages) break; if (likely(pages)) pages++; start += PAGE_SIZE; } if (must_unlock && *locked) { /* * We either temporarily dropped the lock, or the caller * requested that we both acquire and drop the lock. Either way, * we must now unlock, and notify the caller of that state. */ mmap_read_unlock(mm); *locked = 0; } return pages_done; }
same as gup but sets/unsets mmap_lock
-
if (!(flags & FOLL_INTERRUPTIBLE)) return false;
fatal fault signal handler
-
int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { struct vm_area_struct *vma; vm_fault_t ret; address = untagged_addr_remote(mm, address); if (unlocked) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; retry: vma = gup_vma_lookup(mm, address); if (!vma) return -EFAULT; if (!vma_permits_fault(vma, fault_flags)) return -EFAULT; if ((fault_flags & FAULT_FLAG_KILLABLE) && fatal_signal_pending(current)) return -EINTR; ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_COMPLETED) { /* * NOTE: it's a pity that we need to retake the lock here * to pair with the unlock() in the callers. Ideally we * could tell the callers so they do not need to unlock. */ mmap_read_lock(mm); *unlocked = true; return 0; } if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); if (err) return err; BUG(); } if (ret & VM_FAULT_RETRY) { mmap_read_lock(mm); *unlocked = true; fault_flags |= FAULT_FLAG_TRIED; goto retry; } return 0; }
resolves user page fault. policy logic
-
static bool writable_file_mapping_allowed(struct vm_area_struct *vma, unsigned long gup_flags) { /* * If we aren't pinning then no problematic write can occur. A long term * pin is the most egregious case so this is the case we disallow. */ if ((gup_flags & (FOLL_PIN | FOLL_LONGTERM)) != (FOLL_PIN | FOLL_LONGTERM)) return true; /* * If the VMA does not require dirty tracking then no problematic write * can occur either. */ return !vma_needs_dirty_tracking(vma); }
Def policy code. checks if we can write to a map
-
if (*flags & FOLL_NOFAULT) return -EFAULT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) fault_flags |= FAULT_FLAG_REMOTE; if (*flags & FOLL_UNLOCKABLE) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE. * That's because some callers may not be prepared to * handle early exits caused by non-fatal signals. */ if (*flags & FOLL_INTERRUPTIBLE) fault_flags |= FAULT_FLAG_INTERRUPTIBLE; } if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; if (*flags & FOLL_TRIED) { /* * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED * can co-exist */ fault_flags |= FAULT_FLAG_TRIED; } if (unshare) { fault_flags |= FAULT_FLAG_UNSHARE; /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */ VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE); } ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_COMPLETED) { /* * With FAULT_FLAG_RETRY_NOWAIT we'll never release the * mmap lock in the page fault handler. Sanity check this. */ WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT); *locked = 0; /* * We should do the same as VM_FAULT_RETRY, but let's not * return -EBUSY since that's not reflecting the reality of * what has happened - we've just fully completed a page * fault, with the mmap lock released. Use -EAGAIN to show * that we want to take the mmap lock _again_. */ return -EAGAIN; } if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); if (err) return err; BUG(); } if (ret & VM_FAULT_RETRY) { if (!(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; return -EBUSY; }
Seems it's just setting flags for page faults based on flags param
-
static struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int flags, struct follow_page_context *ctx) { pgd_t *pgd; struct mm_struct *mm = vma->vm_mm; ctx->page_mask = 0; /* * Call hugetlb_follow_page_mask for hugetlb vmas as it will use * special hugetlb page table walking code. This eliminates the * need to check for hugetlb entries in the general walking code. */ if (is_vm_hugetlb_page(vma)) return hugetlb_follow_page_mask(vma, address, flags, &ctx->page_mask); pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return no_page_table(vma, flags); return follow_p4d_mask(vma, address, pgd, flags, ctx); }
places mask after following page into pte
-
struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags) { struct follow_page_context ctx = { NULL }; struct page *page; if (vma_is_secretmem(vma)) return NULL; if (WARN_ON_ONCE(foll_flags & FOLL_PIN)) return NULL; /* * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect * to fail on PROT_NONE-mapped pages. */ page = follow_page_mask(vma, address, foll_flags, &ctx); if (ctx.pgmap) put_dev_pagemap(ctx.pgmap); return page; }
finds page
-
if (flags & FOLL_SPLIT_PMD) { spin_unlock(ptl); split_huge_pmd(vma, pmd, address); /* If pmd was left empty, stuff a page table in there quickly */ return pte_alloc(mm, pmd) ? ERR_PTR(-ENOMEM) : follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); } page = follow_trans_huge_pmd(vma, address, pmd, flags); spin_unlock(ptl); ctx->page_mask = HPAGE_PMD_NR - 1; return page;
we're finding the page again but storing page mask in ctx
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) return no_page_table(vma, flags); pte = ptep_get(ptep); if (!pte_present(pte)) goto no_page; if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags)) goto no_page; page = vm_normal_page(vma, address, pte); /* * We only care about anon pages in can_follow_write_pte() and don't * have to worry about pte_devmap() because they are never anon. */ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, vma, flags)) { page = NULL; goto out; } if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { /* * Only return device mapping pages in the FOLL_GET or FOLL_PIN * case since they are only valid while holding the pgmap * reference. */ *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap); if (*pgmap) page = pte_page(pte); else goto no_page; } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ page = ERR_PTR(-EFAULT); goto out; } if (is_zero_pfn(pte_pfn(pte))) { page = pte_page(pte); } else { ret = follow_pfn_pte(vma, address, ptep, flags); page = ERR_PTR(ret); goto out; } } if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) { page = ERR_PTR(-EMLINK); goto out; } VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ ret = try_grab_page(page, flags); if (unlikely(ret)) { page = ERR_PTR(ret); goto out; } /* * We need to make the page accessible if and only if we are going * to access its content (the FOLL_PIN case). Please see * Documentation/core-api/pin_user_pages.rst for details. */ if (flags & FOLL_PIN) { ret = arch_make_page_accessible(page); if (ret) { unpin_user_page(page); page = ERR_PTR(ret); goto out; } } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); }
finds page in pte. Judging by the complexity of the logic this is most likely policy code because we're literally getting user page
-
if (pte_write(pte)) return true; /* Maybe FOLL_FORCE is set to override it? */ if (!(flags & FOLL_FORCE)) return false; /* But FOLL_FORCE has no effect on shared mappings */ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) return false; /* ... or read-only private ones */ if (!(vma->vm_flags & VM_MAYWRITE)) return false; /* ... or already writable ones that just need to take a write fault */ if (vma->vm_flags & VM_WRITE) return false; /* * See can_change_pte_writable(): we broke COW and could map the page * writable if we have an exclusive anonymous page ... */ if (!page || !PageAnon(page) || !PageAnonExclusive(page)) return false; /* ... and a write-fault isn't required for other reasons. */ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte)) return false; return !userfaultfd_pte_wp(vma, pte);
flag checks to see if u can write to a pte
-
if (flags & FOLL_TOUCH) { pte_t orig_entry = ptep_get(pte); pte_t entry = orig_entry; if (flags & FOLL_WRITE) entry = pte_mkdirty(entry); entry = pte_mkyoung(entry); if (!pte_same(orig_entry, entry)) { set_pte_at(vma->vm_mm, address, pte, entry); update_mmu_cache(vma, address, pte); }
uses pte to mark dirty pages and finds pfn in pte
-
struct folio *folio = page_folio(page); if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) return -ENOMEM; if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) return -EREMOTEIO; if (flags & FOLL_GET) folio_ref_inc(folio);
checks for code that is involved in policy but is not the actual logic
-
else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ if (is_zero_page(page)) return 0; /* * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { folio_ref_add(folio, 1); atomic_add(1, &folio->_pincount); } else { folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); }
Logic that actually tries to grab the folio. Also policy use code and not actual policy
-
if (flags & FOLL_PIN) { if (is_zero_folio(folio)) return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); if (folio_test_large(folio)) atomic_sub(refs, &folio->_pincount); else refs *= GUP_PIN_COUNTING_BIAS; }
Checks if the folio is zero/large
-
if (unlikely((flags & FOLL_LONGTERM) && !folio_is_longterm_pinnable(folio))) { if (!put_devmap_managed_page_refs(&folio->page, refs)) folio_put_refs(folio, refs); return NULL;
checks for longterm folio pins.
-
if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) return NULL; if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) return NULL;
Time saving predictions(unlikely) and single time warning func(WARN_ON_ONCE) for flags. Not actual policy logic so low confidence.
-
if (flags & FOLL_GET) return try_get_folio(page, refs);
Policy logic that determines and tries to retrieve folios based on given flags.
-
if (gup_flags & FOLL_PIN) mm_set_has_pinned_flag(¤t->mm->flags); if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); start = untagged_addr(start) & PAGE_MASK; len = nr_pages << PAGE_SHIFT; if (check_add_overflow(start, len, &end)) return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT;
checking for overflow in page alloc likely
-
if (flags & FOLL_PIN) { ret = arch_make_page_accessible(page); if (ret) { gup_put_folio(folio, 1, flags); goto pte_unmap; } }
part of policy code
-
if (!(gup_flags & FOLL_LONGTERM)) return __get_user_pages_locked(mm, start, nr_pages, pages, locked, gup_flags);
policy decision to get locked page!
-
vm_flags = (foll_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (foll_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
vm_flags
-
if (!(flags & FOLL_UNLOCKABLE)) { /* VM_FAULT_RETRY couldn't trigger, bypass */ pages_done = ret; break; }
flag
-
if (ret & VM_FAULT_COMPLETED) { /* * NOTE: it's a pity that we need to retake the lock here * to pair with the unlock() in the callers. Ideally we * could tell the callers so they do not need to unlock. */ mmap_read_lock(mm); *unlocked = true; return 0; } if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); if (err) return err; BUG(); } if (ret & VM_FAULT_RETRY) { mmap_read_lock(mm); *unlocked = true; fault_flags |= FAULT_FLAG_TRIED; goto retry; }
a lot of VM fault flags that are checked when calling a function that handles page faults
-
if (flags & FOLL_PIN) mm_set_has_pinned_flag(&mm->flags);
-
if (unlocked) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
flag pol to check if map is unlocked (for pos remap?)
-
if (page_increm > nr_pages) page_increm = nr_pages;
next page logic
-
*/ if (gup_flags & FOLL_MADV_POPULATE) { vma = vma_lookup(mm, start); if (!vma) { ret = -ENOMEM; goto out; } if (check_vma_flags(vma, gup_flags)) { ret = -EINVAL; goto out; } goto retry; }
page populate flag for sure
-
if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ if (is_vm_hugetlb_page(vma)) return -EFAULT; /* * We used to let the write,force case do COW in a * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could * set a breakpoint in a read-only mapping of an * executable, without corrupting the file (yet only * when that file had been opened for writing!). * Anon pages in shared mappings are surprising: now * just reject it. */ if (!is_cow_mapping(vm_flags)) return -EFAULT; } } else if (!(vm_flags & VM_READ)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; /* * Is there actually any vma we can reach here which does not * have VM_MAYREAD set? */ if (!(vm_flags & VM_MAYREAD)) return -EFAULT; }
cow mapping, sdw stack seem like imp policies which dont seem like regular flags
-
- Oct 2024
-
elixir.bootlin.com elixir.bootlin.com
-
if ((gup_flags & (FOLL_PIN | FOLL_LONGTERM)) != (FOLL_PIN | FOLL_LONGTERM)) return true;
most likely part of a policy code that includes flag based decision making
Tags
Annotators
URL
-
-
elixir.bootlin.com elixir.bootlin.com
-
if (s->flags & __CMPXCHG_DOUBLE) { ret = __update_freelist_fast(slab, freelist_old, counters_old, freelist_new, counters_new); } else { ret = __update_freelist_slow(slab, freelist_old, counters_old, freelist_new, counters_new); }
This policy is very similar to annotated code below. The description is reproduced here:
This policy determines if the system has support for compare and exchange. If so, it will use the "__update_freelist_fast()" function, which uses a compare and exchange internally. Otherwise, it will use "__update_freelist_slow()", which uses a lock (specifically a bit-based spinlock) internally.
-
#ifdef CONFIG_SLAB_FREELIST_HARDENED encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr); #else encoded = (unsigned long)ptr; #endif
This policy uses the configuration setting "CONFIG_SLAB_FREELIST_HARDENED" to determine whether to obfuscate a SLUB free list pointer or not for increased security at the cost of some performance.
-
#ifdef CONFIG_SLAB_FREELIST_HARDENED decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr)); #else decoded = (void *)ptr.v; #endif
This policy is based on the configuration setting indicated by "CONFIG_SLAB_FREELIST_HARDENED", which hardens the slab free list. If the free list is hardened, then free list pointers will be obfuscated; this policy just undoes the obfuscation in that case. In the case where free list pointers are not obfuscated, this function just returns the unmodified pointer value.
-
#ifdef CONFIG_SLAB_FREELIST_RANDOM /* Pre-initialize the random sequence cache */ static int init_cache_random_seq(struct kmem_cache *s) { unsigned int count = oo_objects(s->oo); int err; /* Bailout if already initialised */ if (s->random_seq) return 0; err = cache_random_seq_create(s, count, GFP_KERNEL); if (err) { pr_err("SLUB: Unable to initialize free list for %s\n", s->name); return err; } /* Transform to an offset on the set of pages */ if (s->random_seq) { unsigned int i; for (i = 0; i < count; i++) s->random_seq[i] *= s->size; } return 0; } /* Initialize each random sequence freelist per cache */ static void __init init_freelist_randomization(void) { struct kmem_cache *s; mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) init_cache_random_seq(s); mutex_unlock(&slab_mutex); } /* Get the next entry on the pre-computed freelist randomized */ static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab, unsigned long *pos, void *start, unsigned long page_limit, unsigned long freelist_count) { unsigned int idx; /* * If the target page allocation failed, the number of objects on the * page might be smaller than the usual size defined by the cache. */ do { idx = s->random_seq[*pos]; *pos += 1; if (*pos >= freelist_count) *pos = 0; } while (unlikely(idx >= page_limit)); return (char *)start + idx; } /* Shuffle the single linked freelist based on a random pre-computed sequence */ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) { void *start; void *cur; void *next; unsigned long idx, pos, page_limit, freelist_count; if (slab->objects < 2 || !s->random_seq) return false; freelist_count = oo_objects(s->oo); pos = get_random_u32_below(freelist_count); page_limit = slab->objects * s->size; start = fixup_red_left(s, slab_address(slab)); /* First entry is used as the base of the freelist */ cur = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); cur = setup_object(s, cur); slab->freelist = cur; for (idx = 1; idx < slab->objects; idx++) { next = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); next = setup_object(s, next); set_freepointer(s, cur, next); cur = next; } set_freepointer(s, cur, NULL); return true; } #else static inline int init_cache_random_seq(struct kmem_cache *s) { return 0; } static inline void init_freelist_randomization(void) { } static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) { return false; } #endif /* CONFIG_SLAB_FREELIST_RANDOM */
This policy looks at the "CONFIG_SLAB_FREELIST_RANDOM" config setting. If it is set, the policy defines functions to randomize the free list order when creating new pages (for security purposes). If it is not set, the functions involved in randomizing the free list are empty, effectively turning off free list randomization.
-
if (s->flags & __CMPXCHG_DOUBLE) { ret = __update_freelist_fast(slab, freelist_old, counters_old, freelist_new, counters_new); } else { unsigned long flags; local_irq_save(flags); ret = __update_freelist_slow(slab, freelist_old, counters_old, freelist_new, counters_new); local_irq_restore(flags); }
This policy determines if the system has support for compare and exchange. If so, it will use the "__update_freelist_fast()" function, which uses a compare and exchange internally. Otherwise, it will use "__update_freelist_slow()", which uses a lock (specifically a bit-based spinlock) internally.
-
-
elixir.bootlin.com elixir.bootlin.com
-
if (dtc->wb_thresh < 2 * wb_stat_error()) { wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE); dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK); } else { wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE); dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK); }
This is a configuration policy that does a more accurate calculation on the number of reclaimable pages and dirty pages when the threshold for the dirty pages in the writeback context is lower than 2 times the maximal error of a stat counter.
-
static long wb_min_pause(struct bdi_writeback *wb, long max_pause, unsigned long task_ratelimit, unsigned long dirty_ratelimit, int *nr_dirtied_pause
This function is an algorithmic policy that determines the minimum throttle time for a process between consecutive writeback operations for dirty pages based on heuristics. It is used for balancing the load of the I/O subsystems so that there will not be excessive I/O operations that impact the performance of the system.
-
if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh && !writeback_in_progress(wb)) wb_start_background_writeback(wb);
This is a configuration policy that determines whether to start background writeout. The code here indicates that if laptop_mode, which will reduce disk activity for power saving, is not set, then when the number of dirty pages reaches the bg_thresh threshold, the system starts writing back pages.
-
if (thresh > dirty) return 1UL << (ilog2(thresh - dirty) >> 1);
This implements a configuration policy that determines the interval for the kernel to wake up and check for dirty pages that need to be written back to disk.
-
limit -= (limit - thresh) >> 5;
This is a configuration policy that determines how much should the limit be updated. The limit controls the amount of dirty memory allowed in the system.
-
if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) && (!mdtc || m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) { unsigned long intv; unsigned long m_intv; free_running: intv = dirty_poll_interval(dirty, thresh); m_intv = ULONG_MAX; current->dirty_paused_when = now; current->nr_dirtied = 0; if (mdtc) m_intv = dirty_poll_interval(m_dirty, m_thresh); current->nr_dirtied_pause = min(intv, m_intv); break; } /* Start writeback even when in laptop mode */ if (unlikely(!writeback_in_progress(wb))) wb_start_background_writeback(wb); mem_cgroup_flush_foreign(wb); /* * Calculate global domain's pos_ratio and select the * global dtc by default. */ if (!strictlimit) { wb_dirty_limits(gdtc); if ((current->flags & PF_LOCAL_THROTTLE) && gdtc->wb_dirty < dirty_freerun_ceiling(gdtc->wb_thresh, gdtc->wb_bg_thresh)) /* * LOCAL_THROTTLE tasks must not be throttled * when below the per-wb freerun ceiling. */ goto free_running; } dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) && ((gdtc->dirty > gdtc->thresh) || strictlimit); wb_position_ratio(gdtc); sdtc = gdtc; if (mdtc) { /* * If memcg domain is in effect, calculate its * pos_ratio. @wb should satisfy constraints from * both global and memcg domains. Choose the one * w/ lower pos_ratio. */ if (!strictlimit) { wb_dirty_limits(mdtc); if ((current->flags & PF_LOCAL_THROTTLE) && mdtc->wb_dirty < dirty_freerun_ceiling(mdtc->wb_thresh, mdtc->wb_bg_thresh)) /* * LOCAL_THROTTLE tasks must not be * throttled when below the per-wb * freerun ceiling. */ goto free_running; } dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) && ((mdtc->dirty > mdtc->thresh) || strictlimit); wb_position_ratio(mdtc); if (mdtc->pos_ratio < gdtc->pos_ratio) sdtc = mdtc; }
This is an algorithmic policy that determines whether the process can run freely or a throttle is needed to control the rate of the writeback by checking if the number of dirty pages exceed the average of the global threshold and background threshold.
-
shift = dirty_ratelimit / (2 * step + 1); if (shift < BITS_PER_LONG) step = DIV_ROUND_UP(step >> shift, 8); else step = 0; if (dirty_ratelimit < balanced_dirty_ratelimit) dirty_ratelimit += step; else dirty_ratelimit -= step;
This is a configuration policy that determines how much we should increase/decrease the dirty_ratelimit, which controls the rate that processors write dirty pages back to storage.
-
ratelimit_pages = dirty_thresh / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16;
This is a configuration policy that dynamically determines the rate that kernel can write dirty pages back to storage in a single writeback cycle.
-
t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
This implements a configuration policy that determines the maximum time that the kernel should wait between writeback operations for dirty pages. This ensures that dirty pages are flushed to disk within a reasonable time frame and control the risk of data loss in case of a system crash.
-
if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
This is a configuration policy that controls whether to update the limit in the control group. The config enables support for controlling the writeback of dirty pages on a per-cgroup basis in the Linux kernel. This allows for better resource management and improved performance.
-
-
elixir.bootlin.com elixir.bootlin.com
-
enum get_ksm_page_flags { GET_KSM_PAGE_NOLOCK, GET_KSM_PAGE_LOCK, GET_KSM_PAGE_TRYLOCK };
This defines the locking behavior when accessing a KSM page. The caller can choose between no locking, locking, or trying to lock without blocking, affecting how the page is accessed and modified.
-
static int ksmd_should_run(void) { return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.slot.mm_node); }
This function checks whether the ksmd should run based on the KSM_RUN_MERGE flag and whether there are any memory regions to process. The decision to enable or disable memory merging is controlled by the ksm_run configuration.
-
-
elixir.bootlin.com elixir.bootlin.com
-
randomize_stack_top
This function uses a configuration policy to enable Address Space Layout Randomization (ASLR) for a specific process if PF_RANDOMIZE flag is set. It randomly arranges the positions of stack of a process to help defend certain attacks by making memory addresses unpredictable.
-
-
elixir.bootlin.com elixir.bootlin.com
-
if (movable_node_is_enabled()) {
This policy, as the comment states, will ignore the kernelcore and movablecore options if movable nodes are enabled (skipping the logic below this if statement and jumping to "out2" instead). The logic for the "movable_node_is_enabled()" function is in "memory_hotplug.h".
-
if (page_poisoning_enabled() || (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && debug_pagealloc_enabled())) {
This check looks at flags and configuration settings to determine if page poisoning should be enabled.
-
if (descending)
This decides whether to iterate forward or backward through the zones passed into the function. The variable "descending" is determined by the "arch_has_descending_max_zone_pfns()" function on line 1811, which is determined from configuration options.
-
if (overcommit_policy == OVERCOMMIT_NEVER)
This policy controls the memory batch size based on the overcommit policy, choosing a smaller batch size when the policy is OVERCOMMIT_NEVER.
-
-
elixir.bootlin.com elixir.bootlin.com
-
if (prev_class) { if (can_merge(prev_class, pages_per_zspage, objs_per_zspage)) { pool->size_class[i] = prev_class; continue; } }
This is an algorithmic policy. A
zs_pool
maintainszs_page
s of differentsize_class
. However, somesize_class
es share exactly same characteristics, namelypages_per_zspage
andobjs_per_zspage
. Recall the other annotation of mine, it searched freezspage
s bysize_class
:zspage = find_get_zspage(class);
. Thus, grouping different classes improves memory utilization. -
zspage = find_get_zspage(class); if (likely(zspage)) { obj = obj_malloc(pool, zspage, handle); /* Now move the zspage to another fullness group, if required */ fix_fullness_group(class, zspage); record_obj(handle, obj); class_stat_inc(class, ZS_OBJS_INUSE, 1); goto out; }
This is an algorithmic policy. Instead of immediately allocating new zspages for each memory request, the algorithm first attempts to find and reuse existing partially filled zspages from a given size class by invoking the find_get_zspage(class) function. It also updates the corresponding fullness groups.
-
-
elixir.bootlin.com elixir.bootlin.com
-
1
This is a configuration policy that sets the timeout between retries if vmap_pages_range() fails. This could be tunable variable.
-
if (!(flags & VM_ALLOC)) area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, KASAN_VMALLOC_PROT_NORMAL);
This is an algorithmic policy. This is an optimization that prevents duplicate marks of accessibility. Only pages allocated without
VM_ALLOC
(e.g. ioremap) was not set accessible (unpoison), thus requiring explicit setting here. -
100U
This is an configuration policy that determines 100 pages are the upper limit for the bulk-allocator. However, the implementation of
alloc_pages_bulk_array_mempolicy
does not explicitly limit in the implementation. So I believe it is an algorithmic policy related to some sort of optimization. -
if (!order) {
This is an algorithmic policy determines that only use the bulk allocator for order-0 pages (non-super pages). Maybe the bulk allocator could be applied to super pages to speed up allocation. Currently I haven't seen the reason why it cannot be applied.
-
if (likely(count <= VMAP_MAX_ALLOC)) { mem = vb_alloc(size, GFP_KERNEL); if (IS_ERR(mem)) return NULL; addr = (unsigned long)mem; } else { struct vmap_area *va; va = alloc_vmap_area(size, PAGE_SIZE, VMALLOC_START, VMALLOC_END, node, GFP_KERNEL, VMAP_RAM); if (IS_ERR(va)) return NULL; addr = va->va_start; mem = (void *)addr; }
This is an algorithmic policy that determines whether to use a more efficient
vl_alloc
which does not involve complex virtual-to-physical mappings. Unlike the latteralloc_vmap_area
,vb_alloc
does not need to traverse the rb-tree of free vmap areas. It simply find a larger enough block fromvmap_block_queue
. -
VMAP_PURGE_THRESHOLD
The threshold VMAP_PURGE_THRESHOLD is a configuration policy that could be tuned by machine learning. Setting this threshold lower reduces purging activities while setting it higher reduces framentation.
-
if (!(force_purge
This is an algorithmic policy that prevents purging blocks with considerable amount of "usable memory" unless requested with force_purge.
-
resched_threshold = lazy_max_pages() << 1;
The assignment of resched_threshold and lines 1776-1777 are configuration policies to determine fewer than which number of lazily-freed pages it should yield CPU temporarily to higher-priority tasks.
-
log = fls(num_online_cpus());
This heuristic scales lazy_max_pages logarithmically, which is a configuration policy. Alternatively, machine learning could determine the optimal scaling function—whether linear, logarithmic, square-root, or another approach.
-
32UL * 1024 * 1024
This is a configuration policy that decides to always returns multiples of 32 MB worth of pages. This could be a configurable variable rather than a fixed magic number.
-
-
elixir.bootlin.com elixir.bootlin.com
-
static bool should_skip_region(struct memblock_type *type, struct memblock_region *m, int nid, int flags) { int m_nid = memblock_get_region_node(m); /* we never skip regions when iterating memblock.reserved or physmem */ if (type != memblock_memory) return false; /* only memory regions are associated with nodes, check it */ if (nid != NUMA_NO_NODE && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) return true; /* skip nomap memory unless we were asked for it explicitly */ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) return true; /* skip driver-managed memory unless we were asked for it explicitly */ if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) return true; return false; }
This policy determines whether a memblock region should be skipped, based on several checks that incorporate various flags. You can see this policy being used in other functions on lines 1080 and 1184 in this file; these other functions appear to be sub-functions for iterators on the memblock regions.
-
if (memblock_bottom_up())
This policy controls whether the memblock allocator should allocate memory from the bottom up or from the top down.
-
- Sep 2024
-
elixir.bootlin.com elixir.bootlin.com
-
if (lruvec->file_cost + lruvec->anon_cost > lrusize / 4) { lruvec->file_cost /= 2; lruvec->anon_cost /= 2; }
It is a configuration policy. The policy here is to adjust the cost of current page. The cost means the overhead for kernel to operate the page if the page is swapped out. Kernel adopts a decay policy. Specifically, if current cost is greater than lrusize/4, its cost will be divided by 2. If kernel has no this policy, after a long-term running, the cost of each page is very high. Kernel might mistakenly reserve pages which are frequently visited long time ago but are inactive currently. It might cause performance degradation. The value (lrusize/4) here has a trade-off between performance and sensitivity. For example, if the value is too small, kernel will frequently adjust the priority of each page, resulting in process's performance degradation. If the value is too large, kernel might be misleaded by historical data, causing wrongly swapping currently popular pages and further performance degradation.
-
if (megs < 16) page_cluster = 2; else page_cluster = 3;
It is a configuration policy. The policy here is to determine the size of page cluster. "2" and "3" is determined externally to the kernel. Page cluster is the actual execution unit when swapping. If the machine is small-memory, kernel executes swap operation on 4 (2^2) pages for each time. Otherwise, kernel operats 8 (2^3) for each time. The rational here is to avoid much memory pressure for small-memory system and to improve performance for large-memory system.
-
-
elixir.bootlin.com elixir.bootlin.com
-
mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
It is a configuration policy. It is used to count the physical page for memory control group. The policy here is one physical page corresponds to one count to the memory control group. If using huge page, the value here might not be 1,
-
schedule_timeout_uninterruptible(1);
It is a configuration policy. If kernel cannot allocate an enough virtual space with alignment, while nofail is specified to disallow failure, the kernel will let the process to sleep for 1 time slot. In this period, the process cannot be interrupted and quit the schedule queue of CPU. The "1" here is a configuration parameter, made externally to the kernel. It is not large enough for latency-sensitive process and is not small enough to retry frequently.
-
if (array_size > PAGE_SIZE) { area->pages = __vmalloc_node(array_size, 1, nested_gfp, node, area->caller); } else { area->pages = kmalloc_node(array_size, nested_gfp, node); }
It is an algorithmic policy and also a configuration policy. The algorithmic policy here is to maxmize the data locality of virtual memory address, by letting the space be in continuous virtual pages. If the demanded size of virtual memory is larger than one page, the kernel will allocate multiple continuous pages for it by using __vmalloc_node(). Otherwise, the kernel will call kmalloc_node() to place it in a single page. On the other hand, to allocate continuous pages, we should invoke __vmalloc_node() by declaring align = 1, which is a configuration policy.
-
if (!counters) return; if (v->flags & VM_UNINITIALIZED) return;
It is an algorithmic policy. The show_numa_info() is used for printing how the virtual memory of v (vm_struct) distributes its pages across numa nodes. The two if conditions are used for filtering the invalid v. The first if means the virtual memory has not been associated with physical page. The second if means the virtual memory has not been initialized yet.
-
if (page) copied = copy_page_to_iter_nofault(page, offset, length, iter); else copied = zero_iter(iter, length);
It is an algorithmic policy. The policy here is to avoid lock/unlock overhead by using copy_page_to_iter_nofault() function plus if-else branch. Because copy_page_to_iter_nofault() is based on no page fault, we need to check the physical page is still alive. So the kernel uses an if condition to guarantee copy_page_to_iter_nofault() without page fault. On the other hand, if the page is not valid, kernel chooses to fill the iter using zero value instead of returning an error. I think it is because it can make invocation more convenient. The caller does not need to do a verification of the return value and does not need further actions to handle it.
-
if (base + last_end < vmalloc_start + last_end) goto overflow; /* * Fitting base has not been found. */ if (va == NULL) goto overflow;
It is an algorithmic policy. The two if conditions are used for checking the feasibility of the allocation. The first if means the adress of the allocation is beyond the allowable address, causing overflow. In this case, since the base address is decided from higher one to smaller one, reducing base address cannot work. Note that the base address is dependent on the va. The second if means we don't find a valid va which has appropriate base address. So the policy here is going to overflow branch to re-allocate va list.
-
if (base + start < va->va_start) { va = node_to_va(rb_prev(&va->rb_node)); base = pvm_determine_end_from_reverse(&va, align) - end; term_area = area; continue; }
It is an algorithmic policy. It is used for checking whether the start address of current va is valid. The policy here is to change a new va, different from the solution of "end address check" (explained in my other annotation). The reason is that we find the base address from high address to low address. If we continue to reduce the base address, the if condition will be always wrong. Changing a new va with a lower start address is the only solution.
-
if (base + end > va->va_end) { base = pvm_determine_end_from_reverse(&va, align) - end; term_area = area; continue; }
It is an algorithmic policy. The outer loop is an endless loop until finding a vaild virtual memory space satisfying the requirements. The if condition here is used to guarantee the space in va is large enough. If not, we continue to scan the memory space from high address to low address while satisfying the alignment requirement. Specifically, tuning the base address to a lower address. And then, it will retry for verification.
-
va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT); if (WARN_ON_ONCE(!va)) continue;
It is an algorithmic policy. The for loop is used for transferring the virtual memory managment by using from vm_struct (link list) to vm_area (tree?). Inside the for loop, if we cannot allocate a new vmap_area from vmap_area_cachep (va = NULL), the program will simply print a warning and skip current tmp, instread of retry or anything to guarantee the integrity.
-
if (!spin_trylock(&vmap_area_lock)) return false;
It is an algorithmic policy. The vmalloc_dump_obj() function is used for printing the information of specified virtual memory. Due to concurrency, before manipulating the critical state, we must obtain the lock. The policy here is spin_trylock. It only tries once. If fails, the function will simply returns false. I think the policy here is to avoid long-term waiting to improve the performance.
-
- Aug 2024
-
elixir.bootlin.com elixir.bootlin.com
-
if (hugetlb_cgroup_disabled())
This probably not an interesting policy decision for ldos. It is a feature flag for the running OS. But if cgroups were decided by policy then this flag would be controlled by the cgroup decision.
-
- Sep 2023
-
hypothes.is hypothes.is
-
"Surrendering" by Ocean Vuong
-
He moved into United State when he was age of five. He first came to United State when he started kindergarten. Seven of them live in the apartment one bedroom and bathroom to share the whole. He learned ABC song and alphabet. He knows the ABC that he forgot the letter is M comes before N.
-
He went to the library since he was on the recess. He was in the library hiding from the bully. The bully just came in the library doing the slight frame and soft voice in front of the kid where he sit. He left the library, he walked to the middle of the schoolyard started calling him the pansy and fairy. He knows the American flag that he recognize on the microphone against the backdrop.
-
Tags
- Weeks earlier, I’d been in the library. It was where I would hide during recess. Otherwise, because of my slight frame and soft voice, the boys would call me “pansy” and “fairy” and pull my shorts around my ankles in the middle of the schoolyard. I sat on the floor beside a tape player. From a box of cassettes, I chose one labelled “Great American Speeches.” I picked it because of the illustration, a microphone against a backdrop of the American flag. I picked it because the American flag was one of the few symbols I recognized.
- My family immigrated to the U.S. from Vietnam in 1990, when I was two. We lived, all seven of us, in a one-bedroom apartment in Hartford, Connecticut, and I spent my first five years in America surrounded, inundated, by the Vietnamese language. When I entered kindergarten, I was, in a sense, immigrating all over again, except this time into English. Like any American child, I quickly learned my ABCs, thanks to the age-old melody (one I still sing rapidly to myself when I forget whether “M” comes before “N”). Within a few years, I had become fluent—but only in speech, not in the written word.
Annotators
URL
-
- Apr 2022
-
twitter.com twitter.com
-
Dr Ellie Murray. (2021, February 23). A thing I feel is weird about how we are all reacting to this pandemic: Mourning is still so individual & private. It surprises me there aren’t campaigns for armbands, ribbons, wreaths on doors, or some sort of flag in the window to say “a loved one was lost to COVID here”. [Tweet]. @EpiEllie. https://twitter.com/EpiEllie/status/1364033220904427524
-
- Mar 2022
-
-
For Aboriginal Australians,its importance is recognised by its position at the centre of thenational Aboriginal flag, developed in 1971 by Luritja artist HaroldThomas.
The Aboriginal flag was developed in 1971 by Luritja artist Harold Thomas. Centering its importance to Aboriginal Australians, the sun appears in the middle of the flag.
It's subtle here, as in other instances, but notice that Hamacher gives the citation to the Indigenous artist that developed the flag and simultaneously underlines the source of visual information that is associated with the flag and the sun. It's not just the knowledge of the two things which are associated to each other, but they're also both associated with a person who is that source of knowledge.
Is this three-way association common in all Indigenous cultures? While names may be tricky for some, the visual image of a particular person's face, body, and presence is usually very memorable and thereby easy to attach to various forms of knowledge.
Does the person/source of knowledge form or act like an 'oral folder' for Indigenous knowledge?
-
- Jul 2021
-
poisotlab.github.io poisotlab.github.io
-
contest
Are we really contesting that?
-
predictions of binary interactions can be more readily interpreted.
Repeated sentence
-
-
www.migrationencounters.org www.migrationencounters.org
-
Ben: Oh yeah. Yeah. Yes, I felt normal, I had a lot of friends and our high school, the high school I went to, there was very few Hispanics- period, very few blacks. If you looked at that high school, if you pulled it up—well actually they made them remove the confederate flag, because the confederate flag was part of, it was the school football team logo and it was on their helmets. They were called the Southland Prairie Warriors, and when they ran out on the football field, they carried the confederate flag—and the high school flew the confederate flag up with the Texas and US flag—which it would make you think the opposite. I can't say that the school was…Of course there were a few people, but I did okay. I didn't feel out of place and I felt pretty well accepted by others.
Time in the US, School, High School, Fitting in/belonging
Tags
Annotators
URL
-
- Jun 2021
-
poisotlab.github.io poisotlab.github.io
-
Given two species co-occur, a neutral approach to probabilistic interactions would assume that the effect of abundances and trait matching would have no effec
I think you mean: A neutral approach to probabilistic interactions would rely only on the effect of abundances and assume trait-matching would have no effect?
Tags
Annotators
URL
-
- Mar 2021
-
twitter.com twitter.com
-
ReconfigBehSci on Twitter: ‘RT @factmata: We are excited to launch of a side project we worked on this summer—Https://t.co/2yGSgkqzTG. We scan Twitter profiles with…’ / Twitter. (n.d.). Retrieved 6 March 2021, from https://twitter.com/SciBeh/status/1323664538777124867
-
- Feb 2021
-
poisotlab.github.io poisotlab.github.io
-
This could potentially be solved through our framework of predicting networks first, interactions next, and finally the realized species pool.
I think this might be confusing, as readers could think this is the actual framework of our paper. We could maybe precise that we could use our framework the other way around, by using our predictions of ecological networks and species interactions to make better predictions of species pools.
-
all interactions occur between species in each pool
I think this definition of bipartite network is a little confusing. We could say something like:
"Bipartite networks are divided into two disjoint sets of species and interactions occur between members of different sets (e.g. plant-pollinator and host-parasite networks)"
-
there have been calls for a probabilistic species pool
you say twice there have been calls. Maybe simplify the sentence with something like: through probabilistic species pool, and more importantly ...
-
.S
missing space
-
embedding projects
missing words
-
reached an accuracy of ≈ 0.8
Semi- by curiosity, semi- because it might be better to explicitly say it, but how was accuracy calculated? It is not clear if correctly predicted absences are used? As stated before, there are no true negative.
-
Here adopt a question-driven approach to serve as a guide through the path toward building models to predict and forecast the structure of ecological networks across space, and to identify the next steps in the research regime.
The sentence is very hard to read. too many long noun strings.
-
-
web.hypothes.is web.hypothes.is
-
It is not currently possible to “un-flag” an annotation — if an annotation is flagged by mistake, the group creator can choose not to hide it.
Wow, this is so emblematic of the age... any person can taint another irrevocably with a flag!
And an accident cannot be undone!
Now when I screen share on Zoom to discuss our document, everyone will see a big red flag.
Tags
Annotators
URL
-
- Dec 2020
- Sep 2020
-
dagrs.berkeley.edu dagrs.berkeley.edu
-
Grzegorz Grudzi ́nski
Hello world
Tags
Annotators
URL
-
- Jul 2020
-
gitlab.com gitlab.com
- Jun 2020
-
www.scientificamerican.com www.scientificamerican.com
-
Sathya, C. (2020, June 17). Pandemic-Related Gun Purchases Raise Suicide Risks. Scientific American. https://www.scientificamerican.com/article/pandemic-related-gun-purchases-raise-suicide-risks/
-
- May 2020
-
gitlab.com gitlab.com
-
The Inherited Environment Variables feature is under development and not ready for production use. It is deployed behind a feature flag that is disabled by default.
Tags
Annotators
URL
-
- Jun 2019
-
earlybritishlit.pressbooks.com earlybritishlit.pressbooks.com
-
Yanity and vice
Does anybody know what this means?
-
- Jan 2016
-
www.elysee.fr www.elysee.fr
-
le bleu et le rouge
J'aurais aimé savoir quel bleu et quel rouge sont utilisés sur le drapeau.
Tags
Annotators
URL
-