vm.c源码阅读

代码框架

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// vm.c
|-——kvminit()
+___kvmmap()
|___mappages()
+___walk()
|___kvminithart()
+___kvmpa()
|___uvmcreate()
+___uvminit()
|___uvmalloc()
+___uvmdealloc()
|___uvmfree()
+___uvmunmap()
|___walkaddr()
+___uvmcopy()
|___uvmclear()
+___copyinstr()
|___copyout()
+___copyin()

kvminit() kvmmap() walk() mappages()

初始换内核的直接映射页表,设置内核虚拟内存到物理内存的映射

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*
* create a direct-map page table for the kernel.
*/
void kvminit() {
kernel_pagetable = (pagetable_t) kalloc();
memset(kernel_pagetable, 0, PGSIZE);

// uart registers
kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W);

// virtio mmio disk interface
kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);

// CLINT
kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W);

// PLIC
kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W);

// map kernel text executable and read-only.
kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);

// map kernel data and the physical RAM we'll make use of.
kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);

// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
}

kalloc分配物理内存页,memset将新分配的页表清零
接下来用kvmmap函数设置页表项,将虚拟地址映射到物理地址,并设置访问权限

1
2
3
4
5
6
7
8
// add a mapping to the kernel page table.
// only used when booting.
// does not flush TLB or enable paging.
void kvmmap(uint64 va, uint64 pa, uint64 sz, int perm) {
if (mappages(kernel_pagetable, va, sz, pa, perm) != 0) {
panic("kvmmap");
}
}

kvmmap用来设置内核虚拟地址到物理地址的映射,主要在内核启动的过程中使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
// Create PTEs for virtual addresses starting at va that refer to
// physical addresses starting at pa. va and size might not
// be page-aligned. Returns 0 on success, -1 if walk() couldn't
// allocate a needed page-table page.
int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) {
uint64 a, last;
pte_t *pte;

a = PGROUNDDOWN(va);
last = PGROUNDDOWN(va + size - 1);
for(;;){
if ((pte = walk(pagetable, a, 1)) == 0)
return -1;
if (*pte & PTE_V)
panic("remap");
*pte = PA2PTE(pa) | perm | PTE_V;
if (a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}

walk用来指示需要分配的页表页

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// Return the address of the PTE in page table pagetable
// that corresponds to virtual address va. If alloc!=0,
// create any required page-table pages.
//
// The risc-v Sv39 scheme has three levels of page-table
// pages. A page-table page contains 512 64-bit PTEs.
// A 64-bit virtual address is split into five fields:
// 39..63 -- must be zero.
// 30..38 -- 9 bits of level-2 index.
// 21..29 -- 9 bits of level-1 index.
// 12..20 -- 9 bits of level-0 index.
// 0..11 -- 12 bits of byte offset within the page.
pte_t *
walk(pagetable_t pagetable, uint64 va, int alloc)
{
if(va >= MAXVA)
panic("walk");

for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0;
memset(pagetable, 0, PGSIZE);
*pte = PA2PTE(pagetable) | PTE_V;
}
}
return &pagetable[PX(0, va)];
}

mappages在给定页表中创建一系列页表项,并设置权限,调用walk在给定的页表页中找到对应的页表项,一些具体设置在riscv.h中

1
2
3
4
5
6
7
8
9
10
11
12
//riscv.h
// shift a physical address to the right place for a PTE.
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)

#define PTE2PA(pte) (((pte) >> 10) << 12)

#define PTE_FLAGS(pte) ((pte) & 0x3FF)

// extract the three 9-bit page table indices from a virtual address.
#define PXMASK 0x1FF // 9 bits
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)

kvminithart()

1
2
3
4
5
6
7
8
// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart()
{
w_satp(MAKE_SATP(kernel_pagetable));
sfence_vma();
}

kvminithart将硬件的页表寄存器(SATP)切换到内核的页表,实现分页机制,w_stap是riscv.h中的一个函数,将内核页表写入SATP(Supervisor Address Translation and Protection)寄存器,SATP存储了页表的物理地址,sfence_vma用于刷新处理器缓存中的地址转换信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))

// supervisor address translation and protection;
// holds the address of the page table.
static inline void
w_satp(uint64 x)
{
asm volatile("csrw satp, %0" : : "r" (x));
}

// flush the TLB.
static inline void
sfence_vma()
{
// the zero, zero means flush all TLB entries.
asm volatile("sfence.vma zero, zero");
}

kvmpa()

将内核虚拟地址转换成物理地址,先计算虚拟地址在页内的偏移量,再求页表项的首地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// translate a kernel virtual address to
// a physical address. only needed for
// addresses on the stack.
// assumes va is page aligned.
uint64
kvmpa(uint64 va)
{
uint64 off = va % PGSIZE;
pte_t *pte;
uint64 pa;

pte = walk(kernel_pagetable, va, 0);
if(pte == 0)
panic("kvmpa");
if((*pte & PTE_V) == 0)
panic("kvmpa");
pa = PTE2PA(*pte);
return pa+off;
}

uvmcreate() uvminit()

uvmcreate()
用于创建一个新的用户页表,给其分配物理内存

1
2
3
4
5
6
7
8
9
10
11
12
// create an empty user page table.
// returns 0 if out of memory.
pagetable_t
uvmcreate()
{
pagetable_t pagetable;
pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
return 0;
memset(pagetable, 0, PGSIZE);
return pagetable;
}

uvminit()
用于将用户初始代码加载到一个新的用户页表中,通常用于创建第一个用户进程,接受三个参数:一个指向用户页表的指针pagetable,一个指向初始代码的指针src,以及初始代码的大小sz。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// Load the user initcode into address 0 of pagetable,
// for the very first process.
// sz must be less than a page.
void
uvminit(pagetable_t pagetable, uchar *src, uint sz)
{
char *mem;

if(sz >= PGSIZE)
panic("inituvm: more than a page");
mem = kalloc();
memset(mem, 0, PGSIZE);
mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U);
memmove(mem, src, sz);
}

uvmalloc() uvmdealloc()

uvmalloc()
用于扩展一个进程的虚拟内存空间。函数接受三个参数:一个指向用户页表的指针pagetable,旧的内存大小oldsz,以及新的内存大小newsz。函数的目标是将进程的内存从oldsz扩展到newsz,即使newsz不是页对齐的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// Allocate PTEs and physical memory to grow process from oldsz to
// newsz, which need not be page aligned. Returns new size or 0 on error.
uint64
uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
char *mem;
uint64 a;

if(newsz < oldsz)
return oldsz;

oldsz = PGROUNDUP(oldsz);
for(a = oldsz; a < newsz; a += PGSIZE){
mem = kalloc();
if(mem == 0){
uvmdealloc(pagetable, a, oldsz);
return 0;
}
memset(mem, 0, PGSIZE);
if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){
kfree(mem);
uvmdealloc(pagetable, a, oldsz);
return 0;
}
}
return newsz;
}

uvmdealloc()则是减少一个进程的虚拟内存空间

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// Deallocate user pages to bring the process size from oldsz to
// newsz. oldsz and newsz need not be page-aligned, nor does newsz
// need to be less than oldsz. oldsz can be larger than the actual
// process size. Returns the new process size.
uint64
uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz)
{
if(newsz >= oldsz)
return oldsz;

if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){
int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE;
uvmunmap(pagetable, PGROUNDUP(newsz), npages, 1);
}

return newsz;
}

uvmfree() uvmunmap() freewalk()

uvmfree()
调用uvmunmap()和freewalk()来用于释放用户内存页和页表页

1
2
3
4
5
6
7
8
9
// Free user memory pages,
// then free page-table pages.
void
uvmfree(pagetable_t pagetable, uint64 sz)
{
if(sz > 0)
uvmunmap(pagetable, 0, PGROUNDUP(sz)/PGSIZE, 1);
freewalk(pagetable);
}

uvmunmap()
解除虚拟地址的映射,根据do_free决定是否释放对应物理内存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// Remove npages of mappings starting from va. va must be
// page-aligned. The mappings must exist.
// Optionally free the physical memory.
void
uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free)
{
uint64 a;
pte_t *pte;

if((va % PGSIZE) != 0)
panic("uvmunmap: not aligned");

for(a = va; a < va + npages*PGSIZE; a += PGSIZE){
if((pte = walk(pagetable, a, 0)) == 0)
panic("uvmunmap: walk");
if((*pte & PTE_V) == 0)
panic("uvmunmap: not mapped");
if(PTE_FLAGS(*pte) == PTE_V)
panic("uvmunmap: not a leaf");
if(do_free){
uint64 pa = PTE2PA(*pte);
kfree((void*)pa);
}
*pte = 0;
}
}

freewalk()
通过递归释放页表中的所有页表页

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// Recursively free page-table pages.
// All leaf mappings must already have been removed.
void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}

Print a page table (easy)

YOUR JOB
定义一个名为vmprint()的函数。它应当接收一个pagetable_t作为参数,并以下面描述的格式打印该页表。在exec.c中的return argc之前插入if(p->pid==1) vmprint(p->pagetable),以打印第一个进程的页表

1
2
3
4
5
6
7
8
9
10
11
page table 0x0000000087f6e000
..0: pte 0x0000000021fda801 pa 0x0000000087f6a000
.. ..0: pte 0x0000000021fda401 pa 0x0000000087f69000
.. .. ..0: pte 0x0000000021fdac1f pa 0x0000000087f6b000
.. .. ..1: pte 0x0000000021fda00f pa 0x0000000087f68000
.. .. ..2: pte 0x0000000021fd9c1f pa 0x0000000087f67000
..255: pte 0x0000000021fdb401 pa 0x0000000087f6d000
.. ..511: pte 0x0000000021fdb001 pa 0x0000000087f6c000
.. .. ..510: pte 0x0000000021fdd807 pa 0x0000000087f76000
.. .. ..511: pte 0x0000000020001c0b pa 0x0000000080007000

hint提示先看freewalk函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// vm.c
// Recursively free page-table pages.
// All leaf mappings must already have been removed.

void
freewalk(pagetable_t pagetable)
{
// there are 2^9 = 512 PTEs in a page table.
for(int i = 0; i < 512; i++){
pte_t pte = pagetable[i];
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
freewalk((pagetable_t)child);
pagetable[i] = 0;
} else if(pte & PTE_V){
panic("freewalk: leaf");
}
}
kfree((void*)pagetable);
}

函数以递归的方式去访问页表
我们同样在vm.c文件中定义vmprint函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void _vmprint(pagetable_t pagetable, int level) {
for (int i = 0; i < 512; i++) {
if (pagetable[i] & PTE_V) {
for (int j = 0; j < level; j++) {
if (j==0) {
printf("..");
} else {
printf(" ..");
}
}
printf("%d: pte %p pa %p\n", i, pagetable[i], PTE2PA(pagetable[i]));
if ((pagetable[i] & (PTE_R|PTE_W|PTE_X)) == 0) {
_vmprint((pagetable_t)PTE2PA(pagetable[i]), level + 1);
}
}
}

}

void vmprint(pagetable_t pagetable) {
printf("page table %p\n", pagetable);
_vmprint(pagetable, 1);
}

pagetable[i] & PTE_V代表存在其页表项,打印之后检查其深度,若不是第三层页表,则解析该pte的虚拟地址,转化的物理地址即为下一级页表项,继续递归查找
在defs.h和exec.c中定义并使用该函数

1
2
3
4
5
6
7
8
9
10
11
12
// defs.h
void vmprint(pagetable_t);
void _vmprint(pagetable_t, int);

// exec.c
proc_freepagetable(oldpagetable, oldsz);

if (p->pid == 1) { // 调用函数
vmprint(p->pagetable);
}

return argc; // this ends up in a0, the first argument to main(argc, argv)

make qemu后可以在终端上看到页表成功打印

A kernel page table per process (hard)

Xv6有一个单独的用于在内核中执行程序时的内核页表。内核页表直接映射(恒等映射)到物理地址,也就是说内核虚拟地址x映射到物理地址仍然是x。Xv6还为每个进程的用户地址空间提供了一个单独的页表,只包含该进程用户内存的映射,从虚拟地址0开始。因为内核页表不包含这些映射,所以用户地址在内核中无效。因此,当内核需要使用在系统调用中传递的用户指针(例如,传递给write()的缓冲区指针)时,内核必须首先将指针转换为物理地址。本节和下一节的目标是允许内核直接解引用用户指针。
YOUR JOB
你的第一项工作是修改内核来让每一个进程在内核中执行时使用它自己的内核页表的副本。修改struct proc来为每一个进程维护一个内核页表,修改调度程序使得切换进程时也切换内核页表。对于这个步骤,每个进程的内核页表都应当与现有的的全局内核页表完全一致。如果你的usertests程序正确运行了,那么你就通过了这个实验。
参照hints进行解题

在struct proc中为进程的内核页表增加一个字段

即为每一个进程创建一个内核页表

1
2
3
4
5
6
// proc.h
struct proc {
...

pagetable_t k_pgtbl; // Process's kernel pagetable
}

生成内核页表

为一个新进程生成一个内核页表的合理方案是实现一个修改版的kvminit,这个版本中应当创造一个新的页表而不是修改kernel_pagetable。你将会考虑在allocproc中调用这个函数
在vm.c中定义k_pgtbl_init_in_process(),对象为新创建的一个内核页表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
create a kernel page table for process
*/
pagetable_t k_pgtbl_init_in_process() {
pagetable_t k_pgtbl = uvmcreate();


// uart registers
uvmmap(k_pgtbl,UART0, UART0, PGSIZE, PTE_R | PTE_W);

// virtio mmio disk interface
uvmmap(k_pgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);

// CLINT
uvmmap(k_pgtbl, CLINT, CLINT, 0x10000, PTE_R | PTE_W);

// PLIC
uvmmap(k_pgtbl, PLIC, PLIC, 0x400000, PTE_R | PTE_W);

// map kernel text executable and read-only.
uvmmap(k_pgtbl, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);

// map kernel data and the physical RAM we'll make use of.
uvmmap(k_pgtbl, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);

// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
uvmmap(k_pgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);

return k_pgtbl;
}

在allocproc中调用

1
2
3
4
5
6
7
8
9
10
// proc.c
// An empty user page table.
p->pagetable = proc_pagetable(p);
if(p->pagetable == 0){
freeproc(p);
release(&p->lock);
return 0;
}

p->k_pgtbl = k_pgtbl_init_in_process();

为新创建的页表的内核栈建立映射,在allocproc中调用

同样仿照kvmmap()去写,通过mappages()去做映射

1
2
3
4
5
6
// vm.c
void uvmmap(pagetable_t pagetable, uint64 va, uint64 pa, uint64 sz, int perm) {
if (mappages(pagetable, va, sz, pa, perm) != 0) {
panic("uvmmap");
}
}

在上面的allocproc中进行调用

1
2
3
4
5
6
7
8
p->k_pgtbl = k_pgtbl_init_in_process();

char *pa = kalloc();
if (pa == 0)
panic("kalloc");
uint64 va = KSTACK((int) (p - proc));
uvmmap(p->k_pgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
p->kstack = va;

修改scheduler()

scheduler中加载进程的内核页表到核心的satp寄存器,SATP寄存器包含了需要使用的地址转换表的内存地址

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// vm.c
// Switch h/w page table register to the kernel's page table,
// and enable paging.
void
kvminithart()
{
w_satp(MAKE_SATP(kernel_pagetable));
sfence_vma();
}

// 基本仿照kvminithart写
void proc_inithart(pagetable_t k_pgtbl) {
w_satp(MAKE_SATP(k_pgtbl));
sfence_vma();
}

在scheduler中调用

1
2
3
4
5
6
7
p->state = RUNNING;
c->proc = p;

proc_inithart(p->k_pgtbl);
swtch(&c->context, &p->context);

kvminithart();

释放页表

由于给进程分配了内核页表,所以在freeproc中应当去释放
仿照freewalk定义一个函数,思想依旧是递归遍历

1
2
3
4
5
6
7
8
9
10
11
12
13
14
// vm.c
void proc_free_kpgtbl(pagetable_t kpgtbl) {
for(int i = 0; i < 512; i++){
pte_t pte = kpgtbl[i];
if (pte & PTE_V) {
kpgtbl[i] = 0;
if ((pte & (PTE_R|PTE_W|PTE_X)) == 0){
uint64 child = PTE2PA(pte);
proc_free_kpgtbl((pagetable_t)child);
}
}
}
kfree((void*)kpgtbl);
}

freeproc中调用

1
2
3
4
5
6
7
8
9
10
11
if(p->pagetable)
proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;

if (p->kstack) {
uvmunmap(p->k_pgtbl, p->kstack, 1, 1);
}
p->kstack = 0;
proc_free_kpgtbl(p->k_pgtbl);
p->k_pgtbl = 0;

修改kvmpa

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// translate a kernel virtual address to
// a physical address. only needed for
// addresses on the stack.
// assumes va is page aligned.
uint64
kvmpa(uint64 va)
{
uint64 off = va % PGSIZE;
pte_t *pte;
uint64 pa;

pte = walk(myproc()->k_pgtbl, va, 0); // 改成myproc()->k_pgtbl
if(pte == 0)
panic("kvmpa");
if((*pte & PTE_V) == 0)
panic("kvmpa");
pa = PTE2PA(*pte);
return pa+off;
}

在头文件中添加相关.h文件

1
2
3
4
#include "riscv.h"
#include "spinlock.h"
#include "proc.h"
#include "defs.h"

defs.h中定义一下

1
2
3
4
pagetable_t     k_pgtbl_init_in_process();
void proc_inithart(pagetable_t);
void uvmmap(pagetable_t, uint64, uint64, uint64, int);
void proc_free_kpgtbl(pagetable_t);

Simplify copyin/copyinstr(hard)

/*
2024.6.7
唉。写不动了,之后再写,复习期末先,感觉要凉凉了
*/