Linux内存管理 (7)VMA操作

阅读: 评论:0

Linux内存管理 (7)VMA操作

Linux内存管理 (7)VMA操作

.html

专题:Linux内存管理专题

关键词:VMA、vm_area_struct、查找/插入/合并VMA、红黑树。

用户进程可以拥有3GB大小的空间,远大于物理内存,那么这些用户进程的虚拟地址空间是如何管理的呢?

malloc()或mmap()操作都会要求在虚拟地址空间中分配内存块,但这些内存在物理上往往都是离散的。

这些进程地址空间在内核中使用struct vm_area_struct数据结构来描述,简称VMA,也被称为进程地址空间或进程线性区。

1. 数据结构

struct vm_area_struct可以说是VMA的描述符,在创建之后会插入到mm->mm_rb红黑树和mm->mmap链表中。
复制代码

/** This struct defines a memory VMM memory area. There is one of these* per VM-area/task.  A VM area is any part of the process virtual memory* space that has a special rule for the page-fault handlers (ie a shared* library, the executable area etc).*/
struct vm_area_struct {/* The first cache line has the info for VMA tree walking. */unsigned long vm_start;        /* Our start address within vm_mm. */--------VMA在进程地址空间的起始结束地址unsigned long vm_end;        /* The first byte after our end addresswithin vm_mm. *//* linked list of VM areas per task, sorted by address */struct vm_area_struct *vm_next, *vm_prev;----------------------------------VMA链表的前后成员struct rb_node vm_rb;------------------------------------------------------VMA作为一个节点加入到红黑树中,每个进程的mm_struct中都有一个红黑树mm->mm_rb。/** Largest free memory gap in bytes to the left of this VMA.* Either between this VMA and vma->vm_prev, or between one of the* VMAs below us in the VMA rbtree and its ->vm_prev. This helps* get_unmapped_area find a free area of the right size.*/unsigned long rb_subtree_gap;/* Second cache line starts here. */struct mm_struct *vm_mm;    /* The address space we belong to. */--------指向VMA所属进程的struct mm_struct结构。pgprot_t vm_page_prot;        /* Access permissions of this VMA. */------VMA访问权限unsigned long vm_flags;        /* Flags, see mm.h. */--------------------VMA标志位/** For areas with an address space and backing store,* linkage into the address_space->i_mmap interval tree.*/struct {struct rb_node rb;unsigned long rb_subtree_last;} shared;/** A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma* list, after a COW of one of the file pages.    A MAP_SHARED vma* can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack* or brk vma (with NULL file) can only be in an anon_vma list.*/struct list_head anon_vma_chain; /* Serialized by mmap_sem &-----------用于管理RMAP反向映射。* page_table_lock */struct anon_vma *anon_vma;    /* Serialized by page_table_lock */------用于管理RMAP反向映射。/* Function pointers to deal with this struct. */const struct vm_operations_struct *vm_ops;-----------------------------VMA操作函数合集,常用于文件映射。/* Information about our backing store: */unsigned long vm_pgoff;        /* Offset (within vm_file) in PAGE_SIZE-指定文件映射的偏移量,单位是页面。units, *not* PAGE_CACHE_SIZE */struct file * vm_file;        /* File we map to (can be NULL). */------描述一个被映射的文件。void * vm_private_data;        /* was vm_pte (shared mem) */#ifndef CONFIG_MMUstruct vm_region *vm_region;    /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMAstruct mempolicy *vm_policy;    /* NUMA policy for the VMA */
#endif
}

复制代码

struct mm_struct是描述进程内存管理的核心数据结构,VMA属于进程内存区域。在mm_struct中通过mmap链表和mm_rb对vm_area_struct进行管理。

struct mm_struct {struct vm_area_struct *mmap;        /* list of VMAs */-----单链表,按起始地址递增的方式插入,所有的VMA都连接到此链表中。链表头是mm_struct->mmap。struct rb_root mm_rb;--------------------------------------所有的VMA按照地址插入mm_struct->mm_rb红黑树中,mm_struct->mm_rb是根节点,每个进程都有一个红黑树。
...
}

2. 查找VMA

复制代码

/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{struct rb_node *rb_node;struct vm_area_struct *vma;/* Check the cache first. */vma = vmacache_find(mm, addr);if (likely(vma))return vma;rb_node = mm->mm_rb.rb_node;vma = NULL;while (rb_node) {struct vm_area_struct *tmp;tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);if (tmp->vm_end > addr) {vma = tmp;if (tmp->vm_start <= addr)break;rb_node = rb_node->rb_left;} elserb_node = rb_node->rb_right;}if (vma)vmacache_update(addr, vma);return vma;
}struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
{int i;if (!vmacache_valid(mm))return NULL;count_vm_vmacache_event(VMACACHE_FIND_CALLS);for (i = 0; i < VMACACHE_SIZE; i++) {struct vm_area_struct *vma = current->vmacache[i];if (!vma)continue;if (WARN_ON_ONCE(vma->vm_mm != mm))break;if (vma->vm_start <= addr && vma->vm_end > addr) {count_vm_vmacache_event(VMACACHE_FIND_HITS);return vma;}}return NULL;
}void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
{if (vmacache_valid_mm(newvma->vm_mm))current->vmacache[VMACACHE_HASH(addr)] = newvma;
}

复制代码

3. 插入VMA

复制代码

int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{struct vm_area_struct *prev;struct rb_node **rb_link, *rb_parent;/** The vm_pgoff of a purely anonymous vma should be irrelevant* until its first write fault, when page's anon_vma and index* are set.  But now set the vm_pgoff it will almost certainly* end up with (unless mremap moves it elsewhere before that* first wfault), so /proc/pid/maps tells a consistent story.** By setting it to reflect the virtual start address of the* vma, merges and splits can happen in a seamless way, just* using the existing file pgoff checks and manipulations.* Similarly in do_mmap_pgoff and in do_brk.*/if (!vma->vm_file) {BUG_ON(vma->anon_vma);vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;}if (find_vma_links(mm, vma->vm_start, vma->vm_end,&prev, &rb_link, &rb_parent))return -ENOMEM;if ((vma->vm_flags & VM_ACCOUNT) &&security_vm_enough_memory_mm(mm, vma_pages(vma)))return -ENOMEM;vma_link(mm, vma, prev, rb_link, rb_parent);return 0;
}

复制代码

复制代码

static int find_vma_links(struct mm_struct *mm, unsigned long addr,unsigned long end, struct vm_area_struct **pprev,struct rb_node ***rb_link, struct rb_node **rb_parent)
{struct rb_node **__rb_link, *__rb_parent, *rb_prev;__rb_link = &mm->mm_rb.rb_node;rb_prev = __rb_parent = NULL;while (*__rb_link) {struct vm_area_struct *vma_tmp;__rb_parent = *__rb_link;vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);if (vma_tmp->vm_end > addr) {/* Fail if an existing vma overlaps the area */if (vma_tmp->vm_start < end)return -ENOMEM;__rb_link = &__rb_parent->rb_left;} else {rb_prev = __rb_parent;__rb_link = &__rb_parent->rb_right;}}*pprev = NULL;if (rb_prev)*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);*rb_link = __rb_link;*rb_parent = __rb_parent;return 0;
}

复制代码

复制代码

static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,struct vm_area_struct *prev, struct rb_node **rb_link,struct rb_node *rb_parent)
{struct address_space *mapping = NULL;if (vma->vm_file) {mapping = vma->vm_file->f_mapping;i_mmap_lock_write(mapping);}__vma_link(mm, vma, prev, rb_link, rb_parent);__vma_link_file(vma);if (mapping)i_mmap_unlock_write(mapping);mm->map_count++;validate_mm(mm);
}

复制代码

4. 合并VMA

复制代码

/** Given a mapping request (addr,end,vm_flags,file,pgoff), figure out* whether that can be merged with its predecessor or its successor.* Or both (it neatly fills a hole).** In most cases - when called for mmap, brk or mremap - [addr,end) is* certain not to be mapped by the time vma_merge is called; but when* called for mprotect, it is certain to be already mapped (either at* an offset within prev, or at the start of next), and the flags of* this area are about to be changed to vm_flags - and the no-change* case has already been eliminated.** The following mprotect cases have to be considered, where AAAA is* the area passed down from mprotect_fixup, never extending beyond one* vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:**     AAAA             AAAA                AAAA          AAAA*    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPNNNNXXXX*    cannot merge    might become    might become    might become*                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or*    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or*    mremap move:                                    PPPPNNNNNNNN 8*        AAAA*    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN*    might become    case 1 below    case 2 below    case 3 below** Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:* mprotect_fixup updates vm_flags & vm_page_prot on successful return.*/
struct vm_area_struct *vma_merge(struct mm_struct *mm,struct vm_area_struct *prev, unsigned long addr,unsigned long end, unsigned long vm_flags,struct anon_vma *anon_vma, struct file *file,pgoff_t pgoff, struct mempolicy *policy)
{pgoff_t pglen = (end - addr) >> PAGE_SHIFT;struct vm_area_struct *area, *next;int err;/** We later require that vma->vm_flags == vm_flags,* so this tests vma->vm_flags & VM_SPECIAL, too.*/if (vm_flags & VM_SPECIAL)return NULL;if (prev)next = prev->vm_next;elsenext = mm->mmap;area = next;if (next && next->vm_end == end)        /* cases 6, 7, 8 */next = next->vm_next;/** Can it merge with the predecessor?*/if (prev && prev->vm_end == addr &&mpol_equal(vma_policy(prev), policy) &&can_vma_merge_after(prev, vm_flags,anon_vma, file, pgoff)) {/** OK, it can.  Can we now merge in the successor as well?*/if (next && end == next->vm_start &&mpol_equal(policy, vma_policy(next)) &&can_vma_merge_before(next, vm_flags,anon_vma, file, pgoff+pglen) &&is_mergeable_anon_vma(prev->anon_vma,next->anon_vma, NULL)) {/* cases 1, 6 */err = vma_adjust(prev, prev->vm_start,next->vm_end, prev->vm_pgoff, NULL);} else                    /* cases 2, 5, 7 */err = vma_adjust(prev, prev->vm_start,end, prev->vm_pgoff, NULL);if (err)return NULL;khugepaged_enter_vma_merge(prev, vm_flags);return prev;}/** Can this new request be merged in front of next?*/if (next && end == next->vm_start &&mpol_equal(policy, vma_policy(next)) &&can_vma_merge_before(next, vm_flags,anon_vma, file, pgoff+pglen)) {if (prev && addr < prev->vm_end)    /* case 4 */err = vma_adjust(prev, prev->vm_start,addr, prev->vm_pgoff, NULL);else                    /* cases 3, 8 */err = vma_adjust(area, addr, next->vm_end,next->vm_pgoff - pglen, NULL);if (err)return NULL;khugepaged_enter_vma_merge(area, vm_flags);return area;}return NULL;
}

复制代码

5. 红黑树例子

联系方式:arnoldlu@qq

本文发布于:2024-01-31 17:43:18,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170669420330260.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:内存管理   操作   Linux   VMA
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23