[operating system] MIT 6 s081 LAB6

Lab6: Copy-on-Write Fork for xv6

Original address: YSBLOG
reference material: MIT 6. S081 xv6 LaB6 cow - Zhihu (zhihu.com)

Experimental background:

In the original xv6, when Shell processes instructions, it will create a child process through fork, which contains a complete Shell copy, calling exec to execute the corresponding instruction program in the sub process, and the first thing in exec is to drop the Shell address space of fork copy instead of the address space of the instruction. In this process, the shell is discarded after being copied, which reduces the efficiency of program execution.

Purpose of the experiment:

Combined with the above background, we need to implement cow (copy on write) in this experiment. When creating a child process, we do not actually copy the parent process, but change the page table entry to read-only, and copy the memory only when the parent / child process writes the page for the first time, so as to save the actual memory space.

According to the prompt, first we set the eighth bit of the PTE flag bit to PTE_COW is used to identify whether the page corresponding to the page table item is a cow page.

// riscv.h
#define PTE_COW (1L << 8) // cow page

After we introduce cow, when the parent process exits, we cannot directly release the memory of the parent process, because at this time, the child process may also point to the memory page (the page has not triggered page fault and has not actually been copied). If released, the child process will not be able to access the memory of the page. Therefore, we need to add a reference count to each page. When the process exits, the memory will be released only if the reference count of the page is 0.

// kalloc.c

// Array used to access the physical page reference count
#define PA2PGREF_ID(p) (((p)-KERNBASE)/PGSIZE)
#define PGREF_MAX_ENTRIES PA2PGREF_ID(PHYSTOP)

// Define a single page structure
struct page_ref{
  struct spinlock lock; // One lock per page
  int cnt; // Reference count
};
struct page_ref page_ref_list[PGREF_MAX_ENTRIES]; // Reference count array


void
kinit()
{
  // Initializes the lock for each page reference count
  for (int i = 0; i < PGREF_MAX_ENTRIES; ++i) {
    initlock(&(page_ref_list[i].lock), "kpage_ref");
    page_ref_list[i].cnt = 1; // The initial value of page reference count is 1
  }
  initlock(&kmem.lock, "kmem");
  freerange(end, (void*)PHYSTOP);
}

// Memory is released when the page reference count is 0
void
kfree(void *pa)
{
  struct run *r;

  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Get page lock
  acquire(&((page_ref_list[PA2PGREF_ID((uint64)pa)].lock)));
  // The page reference count is minus one
  page_ref_list[PA2PGREF_ID((uint64)pa)].cnt--;
  if (page_ref_list[PA2PGREF_ID((uint64)pa)].cnt > 0) {
    // Another process is referring to the page and returns directly without releasing memory
    // Release lock
    release(&((page_ref_list[PA2PGREF_ID((uint64)pa)].lock)));
    return;
  }
  release(&((page_ref_list[PA2PGREF_ID((uint64)pa)].lock)));
    
  // The following is the actual memory release of the page
  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);

  r = (struct run*)pa;

  acquire(&kmem.lock);
  r->next = kmem.freelist;
  kmem.freelist = r;
  release(&kmem.lock);
}

// Initialization reference count is 1 when allocating physical memory
void *
kalloc(void)
{
  struct run *r;

  acquire(&kmem.lock);
  r = kmem.freelist; // Get free pages
  if(r)
    kmem.freelist = r->next; // Point an idle page to the next page
  release(&kmem.lock);

  if(r){
    memset((char*)r, 5, PGSIZE); // fill with junk
    // Get page lock
    acquire(&((page_ref_list[PA2PGREF_ID((uint64)r)].lock)));
    page_ref_list[PA2PGREF_ID((uint64)r)].cnt = 1; // The reference count for the new page is 1
    // Release lock
    release(&((page_ref_list[PA2PGREF_ID((uint64)r)].lock)));
  }
  return (void*)r;
}

// Reference page, increase the reference count of pa by 1
int krefpage(uint64 pa) {
  if(pa % PGSIZE != 0 // pa is on the guard page 
  || (char*)pa < end // The kernel code is located in the pa area
  || pa >= PHYSTOP) // pa exceeds the maximum memory area
  { 
    return -1;
  }
  acquire(&((page_ref_list[PA2PGREF_ID(pa)].lock)));
  page_ref_list[PA2PGREF_ID(pa)].cnt++; // Reference count plus one
  release(&((page_ref_list[PA2PGREF_ID(pa)].lock)));
  return 1;
}

// defs.h
int             krefpaga(uint64 pa);

Like lab5, after introducing the cow page, the system triggered page missing interrupt needs special treatment. When it is detected that the page missing interrupt is triggered by the cow mechanism, we need to allocate and map the actual physical memory of the cow page, so we add the cow_check is used to check whether it is a cow page_ Copy is used to perform the actual physical memory allocation and mapping of cow pages.

// kalloc.c
// When a page missing interrupt occurs, it is used to detect whether the page address is a cow page
int cow_check(pagetable_t pagetable, uint64 va) {
  if (va > MAXVA) {
    return 0;
  }
  pte_t *pte = walk(pagetable, va, 0);
  if (pte == 0) {
    return 0;
  }
  if (( (*pte) & PTE_V) == 0) {
    return 0;
  }
  return ((*pte) & (PTE_COW));
}


// When the missing cow page is triggered and the missing page interrupt is triggered, the actual physical memory is allocated and mapped
uint64
cow_copy(pagetable_t pagetable, uint64 va) {
  va = PGROUNDDOWN(va); // Get the starting position of the current page
  pte_t *pte = walk(pagetable, va, 0);
  uint64 pa = PTE2PA(*pte);
  // Get current page lock
  acquire(&((page_ref_list[PA2PGREF_ID(pa)].lock)));
  if (page_ref_list[PA2PGREF_ID(pa)].cnt == 1) {
    // When only one process is in use on the current page, it will directly return to the page
    // Restore permissions on this page
    *pte = (*pte) & (~PTE_COW);
    *pte = (*pte) | (PTE_W);
    // Release lock
    release(&((page_ref_list[PA2PGREF_ID(pa)].lock)));
    return pa;
  }
  // Be sure to release the lock. The lock will be applied in kalloc, which will lead to deadlock
  release(&((page_ref_list[PA2PGREF_ID(pa)].lock)));

  // Allocate new memory pages
  uint64 newpa = (uint64)kalloc();
  if (newpa == 0) {
    // memory allocation failed 
    return 0;
  }

  // Copy old page content to new page
  memmove((void*)newpa, (void*)pa, PGSIZE);
  *pte = (*pte) & (~PTE_V); // Clear page PTE_V flag to prevent remap 
  uint64 flag = PTE_FLAGS(*pte);
  flag = flag | PTE_W;
  flag = flag & (~PTE_COW);

  // Insinuate the applied physical address to the corresponding virtual address
  if(mappages(pagetable, va, PGSIZE, (uint64)newpa, flag) != 0) {
    kfree((void*)newpa);
    return 0;
  }

  // An attempt was made to clear the original cow copy, where the reference count might be 0
  kfree((void*)PGROUNDDOWN(pa));
  
  return (uint64)newpa;
}
// defs.h
uint64 cow_copy(pagetable_t pagetable, uint64 va);
int cow_check(pagetable_t pagetable, uint64 va);

In the same way as lab5, modify the processing of falling into interrupt.

// trap.c
void
usertrap(void)
{
  ... ...
  } else if((which_dev = devintr()) != 0){
    // ok
  } else if (r_scause() == 13 || r_scause() == 15) {
    // Page missing exception
    uint64 va = r_stval();
    if (cow_check(p->pagetable, va)) {
      // Exception is cow page
      if (cow_copy(p->pagetable, va) == 0) {
        p->killed = 1;
      }
    } else {
      p->killed = 1;
    }
  } else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }
  ... ...
}

Finally, you need to modify copyout. When the copied page is a cow page, call cow_copy.

// vm.c
int
copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
  uint64 n, va0, pa0;

  while(len > 0){
    va0 = PGROUNDDOWN(dstva);
    pa0 = walkaddr(pagetable, va0);
    if (cow_check(pagetable, va0) != 0) {
      // If the copy page is a cow page, execute cow copy
      pa0 = cow_copy(pagetable, va0);
    } 
  
    if(pa0 == 0)
      return -1;
    n = PGSIZE - (dstva - va0);
    if(n > len)
      n = len;
    memmove((void *)(pa0 + (dstva - va0)), src, n);

    len -= n;
    src += n;
    dstva = va0 + PGSIZE;
  }
  return 0;
}

Finally, use cowtest and usertests to complete the test.

init: starting sh
$ usertests
usertests starting
test execout: OK
test copyin: OK
test copyout: OK
test copyinstr1: OK
test copyinstr2: OK
test copyinstr3: OK
test rwsbrk: OK
test truncate1: OK
test truncate2: OK
test truncate3: OK
test reparent2: OK
test pgbug: OK
test sbrkbugs: usertrap(): unexpected scause 0x000000000000000c pid=3235
            sepc=0x000000000000555e stval=0x000000000000555e
usertrap(): unexpected scause 0x000000000000000c pid=3236
            sepc=0x000000000000555e stval=0x000000000000555e
OK
test badarg: OK
test reparent: OK
test twochildren: OK
test forkfork: OK
test forkforkfork: OK
test argptest: OK
test createdelete: OK
test linkunlink: OK
test linktest: OK
test unlinkread: OK
test concreate: OK
test subdir: OK
test fourfiles: OK
test sharedfd: OK
test dirtest: OK
test exectest: OK
test bigargtest: OK
test bigwrite: OK
test bsstest: OK
test sbrkbasic: OK
test sbrkmuch: OK
test kernmem: OK
test sbrkfail: OK
test sbrkarg: OK
test validatetest: OK
test stacktest: OK
test opentest: OK
test writetest: OK
test writebig: OK
test createtest: OK
test openiput: OK
test exitiput: OK
test iput: OK
test mem: OK
test pipe1: OK
test preempt: kill... wait... OK
test exitwait: OK
test rmdot: OK
test fourteen: OK
test bigfile: OK
test dirfile: OK
test iref: OK
test forktest: OK
test bigdir: OK
ALL TESTS PASSED
$ cowtest 
simple: ok
simple: ok
three: ok
three: ok
three: ok
file: ok
ALL COW TESTS PASSED

Keywords: C++ Linux Operation & Maintenance Operating System server

Added by one on Fri, 04 Feb 2022 09:03:19 +0200