// Physical memory layout// qemu -machine virt is set up like this,// based on qemu's hw/riscv/virt.c://// 00001000 -- boot ROM, provided by qemu// 02000000 -- CLINT// 0C000000 -- PLIC// 10000000 -- uart0 // 10001000 -- virtio disk // 80000000 -- boot ROM jumps here in machine mode// -kernel loads the kernel here// unused RAM after 80000000.// the kernel uses physical memory thus:// 80000000 -- entry.S, then kernel text and data// end -- start of kernel page allocation area// PHYSTOP -- end RAM used by the kernel
// the kernel expects there to be RAM
// for use by the kernel and user pages
// from physical address 0x80000000 to PHYSTOP.
#define KERNBASE 0x80000000L
#define PHYSTOP (KERNBASE + 128*1024*1024)
// User memory layout.
// Address zero first:
// text
// original data and bss
// fixed-size stack
// expandable heap
// ...
// USYSCALL (shared with kernel)
// TRAPFRAME (p->trapframe, used by the trampoline)
// TRAMPOLINE (the same page as in the kernel)
#define TRAPFRAME (TRAMPOLINE - PGSIZE)
#ifdef LAB_PGTBL
#define USYSCALL (TRAPFRAME - PGSIZE)
struct usyscall {
int pid; // Process ID
};
#endif
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
#define PTE2PA(pte) (((pte) >> 10) << 12)
#define PTE_FLAGS(pte) ((pte) & 0x3FF)
// one beyond the highest possible virtual address.
// MAXVA is actually one bit less than the max allowed by
// Sv39, to avoid having to sign-extend virtual addresses
// that have the high bit set.
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
最后是最大虚拟地址。
kalloc.c
这部分代码主要关于xv6系统的物理内存管理。
extern char end[]; // first address after kernel.
// defined by kernel.ld.
这是kernel的结束地址。
struct
struct run { // 空闲页链表节点
struct run *next;
};
struct { // 内存管理全局状态
struct spinlock lock; // 多核同步锁
struct run *freelist; // 空闲页链表
} kmem;
// Free the page of physical memory pointed at by pa,
// which normally should have been returned by a
// call to kalloc(). (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
}
kalloc
kalloc()用于分配物理页。它会取空闲链表头部指向的物理页,填充垃圾数据后返回该页指针。
// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
struct run *r;
acquire(&kmem.lock);
r = kmem.freelist;
if(r)
kmem.freelist = r->next;
release(&kmem.lock);
if(r)
memset((char*)r, 5, PGSIZE); // fill with junk
return (void*)r;
}
vm.c
这部分代码主要关于xv6的虚拟内存管理。
kvminit
/*
* the kernel's page table.
*/
pagetable_t kernel_pagetable;
extern char etext[]; // kernel.ld sets this to end of kernel code.
extern char trampoline[]; // trampoline.S
// Initialize the one kernel_pagetable
void
kvminit(void)
{
kernel_pagetable = kvmmake();
}
kernel_pagetable是内核页表指针。
kvmmake
// Make a direct-map page table for the kernel.
pagetable_t
kvmmake(void)
{
pagetable_t kpgtbl; // 内核页表指针
kpgtbl = (pagetable_t) kalloc(); // 分配页表空间
memset(kpgtbl, 0, PGSIZE); // 填充
// uart registers
kvmmap(kpgtbl, UART0, UART0, PGSIZE, PTE_R | PTE_W);
// virtio mmio disk interface
kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
#ifdef LAB_NET
// PCI-E ECAM (configuration space), for pci.c
kvmmap(kpgtbl, 0x30000000L, 0x30000000L, 0x10000000, PTE_R | PTE_W);
// pci.c maps the e1000's registers here.
kvmmap(kpgtbl, 0x40000000L, 0x40000000L, 0x20000, PTE_R | PTE_W);
#endif
// PLIC
kvmmap(kpgtbl, PLIC, PLIC, 0x4000000, PTE_R | PTE_W);
// map kernel text executable and read-only.
kvmmap(kpgtbl, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of.
kvmmap(kpgtbl, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
kvmmap(kpgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
// allocate and map a kernel stack for each process.
proc_mapstacks(kpgtbl);
return kpgtbl;
}
kvmmake()用于创建内核页表,这个页表是直接映射的。
kvminithart
// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart()
{
// wait for any previous writes to the page table memory to finish.
sfence_vma();
w_satp(MAKE_SATP(kernel_pagetable));
// flush stale entries from the TLB.
sfence_vma();
}
// Look up a virtual address, return the physical address,
// or 0 if not mapped.
// Can only be used to look up user pages.
uint64
walkaddr(pagetable_t pagetable, uint64 va)
{
pte_t *pte;
uint64 pa;
if(va >= MAXVA)
return 0;
pte = walk(pagetable, va, 0);
if(pte == 0)
return 0;
if((*pte & PTE_V) == 0)
return 0;
if((*pte & PTE_U) == 0)
return 0;
pa = PTE2PA(*pte);
return pa;
}
walkaddr()用于查找虚拟地址对应的物理地址。
kvmmap
// add a mapping to the kernel page table.
// only used when booting.
// does not flush TLB or enable paging.
void
kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm)
{
if(mappages(kpgtbl, va, sz, pa, perm) != 0)
panic("kvmmap");
}
kvmmap()用于内核页表映射。
mappages
// Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa.
// va and size MUST be page-aligned.
// Returns 0 on success, -1 if walk() couldn't
// allocate a needed page-table page.
int
mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm)
{
uint64 a, last;
pte_t *pte;
if((va % PGSIZE) != 0)
panic("mappages: va not aligned");
if((size % PGSIZE) != 0)
panic("mappages: size not aligned");
if(size == 0)
panic("mappages: size");
a = va;
last = va + size - PGSIZE;
for(;;){
if((pte = walk(pagetable, a, 1)) == 0)
return -1;
if(*pte & PTE_V)
panic("mappages: remap");
*pte = PA2PTE(pa) | perm | PTE_V;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
// Remove npages of mappings starting from va. va must be
// page-aligned. The mappings must exist.
// Optionally free the physical memory.
void
uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free)
{
uint64 a;
pte_t *pte;
int sz;
if((va % PGSIZE) != 0)
panic("uvmunmap: not aligned");
for(a = va; a < va + npages*PGSIZE; a += sz){
sz = PGSIZE;
if((pte = walk(pagetable, a, 0)) == 0)
panic("uvmunmap: walk");
if((*pte & PTE_V) == 0) {
printf("va=%ld pte=%ld\n", a, *pte);
panic("uvmunmap: not mapped");
}
if(PTE_FLAGS(*pte) == PTE_V)
panic("uvmunmap: not a leaf");
if(do_free){
uint64 pa = PTE2PA(*pte);
kfree((void*)pa);
}
*pte = 0;
}
}
uvmunmap()用于解除虚拟内存映射,do_free决定是否释放物理页。
uvmcreate
// create an empty user page table.
// returns 0 if out of memory.
pagetable_t
uvmcreate()
{
pagetable_t pagetable;
pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
return 0;
memset(pagetable, 0, PGSIZE);
return pagetable;
}
uvmcreate()用于创建空的用户页表。
uvmfirst
// Load the user initcode into address 0 of pagetable,
// for the very first process.
// sz must be less than a page.
void
uvmfirst(pagetable_t pagetable, uchar *src, uint sz)
{
char *mem;
if(sz >= PGSIZE)
panic("uvmfirst: more than a page");
mem = kalloc();
memset(mem, 0, PGSIZE);
mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
memmove(mem, src, sz);
}
uvmfirst()初始化第一个用户进程的地址空间,将初始化代码映射到虚拟地址0。
uvmalloc
// Allocate PTEs and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
uint64
uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz, int xperm)
{
char *mem;
uint64 a;
int sz;
if(newsz < oldsz)
return oldsz;
oldsz = PGROUNDUP(oldsz);
for(a = oldsz; a < newsz; a += sz){
sz = PGSIZE;
mem = kalloc();
if(mem == 0){
uvmdealloc(pagetable, a, oldsz);
return 0;
}
#ifndef LAB_SYSCALL
memset(mem, 0, sz);
#endif
if(mappages(pagetable, a, sz, (uint64)mem, PTE_R|PTE_U|xperm) != 0){
kfree(mem);
uvmdealloc(pagetable, a, oldsz);
return 0;
}
}
return newsz;
}
uvmalloc()用于扩展用户进程的内存空间。
uvmdealloc
// Deallocate user pages to bring the process size from oldsz to
// newsz. oldsz and newsz need not be page-aligned, nor does newsz
// need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size.
uint64
uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
if(newsz >= oldsz)
return oldsz;
if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){
int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE;
uvmunmap(pagetable, PGROUNDUP(newsz), npages, 1);
}
return newsz;
}
uvmdealloc()缩小用户进程的内存空间。
freewalk
// Recursively free page-table pages.
// All leaf mappings must already have been removed.
void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}
// mark a PTE invalid for user access.
// used by exec for the user stack guard page.
void
uvmclear(pagetable_t pagetable, uint64 va)
{
pte_t *pte;
pte = walk(pagetable, va, 0);
if(pte == 0)
panic("uvmclear");
*pte &= ~PTE_U;
}
uvmclear()取消用户模式的访问权限。
copy
// Copy from kernel to user.
// Copy len bytes from src to virtual address dstva in a given page table.
// Return 0 on success, -1 on error.
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
pte_t *pte;
while(len > 0){
va0 = PGROUNDDOWN(dstva);
if (va0 >= MAXVA)
return -1;
if((pte = walk(pagetable, va0, 0)) == 0) {
// printf("copyout: pte should exist 0x%x %d\n", dstva, len);
return -1;
}
// forbid copyout over read-only user text pages.
if((*pte & PTE_W) == 0)
return -1;
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}
// Copy from user to kernel.
// Copy len bytes to dst from virtual address srcva in a given page table.
// Return 0 on success, -1 on error.
int
copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len)
{
uint64 n, va0, pa0;
while(len > 0){
va0 = PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > len)
n = len;
memmove(dst, (void *)(pa0 + (srcva - va0)), n);
len -= n;
dst += n;
srcva = va0 + PGSIZE;
}
return 0;
}
// Copy a null-terminated string from user to kernel.
// Copy bytes to dst from virtual address srcva in a given page table,
// until a '\0', or max.
// Return 0 on success, -1 on error.
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
uint64 n, va0, pa0;
int got_null = 0;
while(got_null == 0 && max > 0){
va0 = PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > max)
n = max;
char *p = (char *) (pa0 + (srcva - va0));
while(n > 0){
if(*p == '\0'){
*dst = '\0';
got_null = 1;
break;
} else {
*dst = *p;
}
--n;
--max;
p++;
dst++;
}
srcva = va0 + PGSIZE;
}
if(got_null){
return 0;
} else {
return -1;
}
}