#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__x86_64__) #include #endif #define SCHED_REAP_FREQ 10 /* * Lock hierachy: * - proc_tree_lock * - cpu->lock * - proc->lock * - suspension_q->lock */ static struct rb_node_link* proc_tree = NULL; static rw_spin_lock_t proc_tree_lock = RW_SPIN_LOCK_INIT; static atomic_int sched_cycles = 0; /* kernel pseudo process */ static struct proc kpproc; static bool proc_check_elf (uint8_t* elf) { if (!((elf[0] == 0x7F) && (elf[1] == 'E') && (elf[2] == 'L') && (elf[3] == 'F'))) return false; return true; } bool proc_map (struct proc* proc, uintptr_t start_paddr, uintptr_t start_vaddr, size_t pages, uint32_t flags) { spin_lock_ctx_t ctxprpd; struct proc_mapping* mapping = malloc (sizeof (*mapping)); if (mapping == NULL) return false; mapping->paddr = start_paddr; mapping->vaddr = start_vaddr; mapping->size = pages * PAGE_SIZE; flags &= ~(MM_PD_LOCK | MM_PD_RELOAD); /* clear LOCK flag if present, because we lock manualy */ spin_lock (&proc->pd->lock, &ctxprpd); list_append (proc->mappings, &mapping->proc_mappings_link); for (uintptr_t vpage = start_vaddr, ppage = start_paddr; vpage < start_vaddr + pages * PAGE_SIZE; vpage += PAGE_SIZE, ppage += PAGE_SIZE) { mm_map_page (proc->pd, ppage, vpage, flags); } spin_unlock (&proc->pd->lock, &ctxprpd); return true; } bool proc_unmap (struct proc* proc, uintptr_t start_vaddr, size_t pages) { size_t unmap_size = pages * PAGE_SIZE; uintptr_t end_vaddr = start_vaddr + unmap_size; struct list_node_link *mapping_link, *mapping_link_tmp; bool used_tail_mapping = false; spin_lock_ctx_t ctxprpd; struct proc_mapping* tail_mapping = malloc (sizeof (*tail_mapping)); if (tail_mapping == NULL) return false; spin_lock (&proc->pd->lock, &ctxprpd); list_foreach (proc->mappings, mapping_link, mapping_link_tmp) { struct proc_mapping* mapping = list_entry (mapping_link, struct proc_mapping, proc_mappings_link); uintptr_t m_end = mapping->vaddr + mapping->size; /* check overlap */ if ((start_vaddr < m_end) && (end_vaddr > mapping->vaddr)) { /* split in the middle */ if ((start_vaddr > mapping->vaddr) && (end_vaddr < m_end)) { tail_mapping->vaddr = end_vaddr; tail_mapping->paddr = mapping->paddr + (end_vaddr - mapping->vaddr); tail_mapping->size = m_end - end_vaddr; mapping->size = start_vaddr - mapping->vaddr; list_insert_after (proc->mappings, &mapping->proc_mappings_link, &tail_mapping->proc_mappings_link); used_tail_mapping = true; break; } else if ((start_vaddr <= mapping->vaddr) && (end_vaddr < m_end)) { /* shrink left */ size_t diff = end_vaddr - mapping->vaddr; mapping->vaddr += diff; mapping->paddr += diff; mapping->size -= diff; } else if ((start_vaddr > mapping->vaddr) && (end_vaddr >= m_end)) { /* shrink right */ mapping->size = start_vaddr - mapping->vaddr; } else { /* full overlap */ list_remove (proc->mappings, &mapping->proc_mappings_link); free (mapping); } } } if (!used_tail_mapping) free (tail_mapping); for (uintptr_t vpage = start_vaddr; vpage < end_vaddr; vpage += PAGE_SIZE) { mm_unmap_page (proc->pd, vpage, 0); } spin_unlock (&proc->pd->lock, &ctxprpd); return true; } struct elf_aux proc_load_segments (struct proc* proc, uint8_t* elf) { struct elf_aux aux; Elf64_Ehdr* ehdr = (Elf64_Ehdr*)elf; aux.entry = ehdr->e_entry; aux.phnum = ehdr->e_phnum; aux.phent = ehdr->e_phentsize; struct limine_hhdm_response* hhdm = limine_hhdm_request.response; for (uint64_t segment = 0; segment < ehdr->e_phnum; segment++) { Elf64_Phdr* phdr = (Elf64_Phdr*)((uintptr_t)elf + ehdr->e_phoff + (ehdr->e_phentsize * segment)); switch (phdr->p_type) { case PT_PHDR: { aux.phdr = (uint64_t)phdr->p_vaddr; } break; case PT_LOAD: { uintptr_t v_addr = align_down (phdr->p_vaddr, PAGE_SIZE); uintptr_t off = phdr->p_vaddr - v_addr; size_t blks = div_align_up (phdr->p_memsz + off, PAGE_SIZE); struct proc_resource_mem_init mem_init = {.pages = blks}; int rid = atomic_fetch_add (&proc->resources->sys_rids, 1); struct proc_resource* r = proc_create_resource (proc, rid, PR_MEM, RV_PRIVATE, (void*)&mem_init); if (r == NULL) { DEBUG ("pmm oom error while loading ELF segments! (tried to alloc %zu blks)\n", blks); } uintptr_t p_addr = r->u.mem.paddr; memset ((void*)((uintptr_t)hhdm->offset + p_addr), 0, blks * PAGE_SIZE); memcpy ((void*)((uintptr_t)hhdm->offset + p_addr + off), (void*)((uintptr_t)elf + phdr->p_offset), phdr->p_filesz); uint32_t pg_flags = MM_PG_USER | MM_PG_PRESENT; if (phdr->p_flags & PF_W) pg_flags |= MM_PG_RW; proc_map (proc, p_addr, v_addr, blks, pg_flags); } break; } } return aux; } static struct proc* proc_spawn_rd (char* name) { struct rd_file* rd_file = rd_get_file (name); bool ok = proc_check_elf (rd_file->content); DEBUG ("ELF magic %s\n", (ok ? "OK" : "BAD")); if (!ok) return NULL; return proc_from_elf (rd_file->content); } struct proc* proc_find_pid (int pid) { spin_lock_ctx_t ctxprtr; struct proc* proc = NULL; rw_spin_read_lock (&proc_tree_lock, &ctxprtr); rbtree_find (struct proc, &proc_tree, pid, proc, proc_tree_link, pid); rw_spin_read_unlock (&proc_tree_lock, &ctxprtr); return proc; } void proc_register (struct proc* proc, struct cpu* cpu) { spin_lock_ctx_t ctxcpu, ctxprtr; proc->cpu = cpu; spin_lock (&cpu->lock, &ctxcpu); rbtree_insert (struct proc, &cpu->proc_run_q, &proc->cpu_run_q_link, cpu_run_q_link, pid); if (cpu->proc_current == NULL) cpu->proc_current = proc; spin_unlock (&cpu->lock, &ctxcpu); rw_spin_write_lock (&proc_tree_lock, &ctxprtr); rbtree_insert (struct proc, &proc_tree, &proc->proc_tree_link, proc_tree_link, pid); rw_spin_write_unlock (&proc_tree_lock, &ctxprtr); } /* caller holds cpu->lock */ static struct proc* proc_find_sched (struct cpu* cpu) { struct rb_node_link* node = NULL; struct proc* current = cpu->proc_current; struct proc* proc = NULL; if (current) rbtree_next (¤t->cpu_run_q_link, node); if (!node) rbtree_first (&cpu->proc_run_q, node); if (!node) return NULL; struct rb_node_link* first = node; do { proc = rbtree_entry (node, struct proc, cpu_run_q_link); if (atomic_load (&proc->state) == PROC_READY) return proc; rbtree_next (node, node); if (!node) rbtree_first (&cpu->proc_run_q, node); } while (node != first); return ((atomic_load (¤t->state) == PROC_READY) ? current : NULL); } static void proc_reap (void) { struct proc* proc = NULL; struct list_node_link* reap_list = NULL; spin_lock_ctx_t ctxprtr; spin_lock_ctx_t ctxpr; rw_spin_write_lock (&proc_tree_lock, &ctxprtr); struct rb_node_link* node; rbtree_first (&proc_tree, node); while (node) { struct rb_node_link* next; rbtree_next (node, next); proc = rbtree_entry (node, struct proc, proc_tree_link); if (atomic_load (&proc->state) == PROC_DEAD) { spin_lock (&proc->lock, &ctxpr); rbtree_delete (&proc_tree, &proc->proc_tree_link); spin_unlock (&proc->lock, &ctxpr); list_append (reap_list, &proc->reap_link); } node = next; } rw_spin_write_unlock (&proc_tree_lock, &ctxprtr); struct list_node_link *reap_link, *reap_link_tmp; list_foreach (reap_list, reap_link, reap_link_tmp) { proc = list_entry (reap_link, struct proc, reap_link); list_remove (reap_list, &proc->reap_link); DEBUG ("cleanup PID %d\n", proc->pid); proc_cleanup (proc); } } void proc_sched (void* regs) { spin_lock_ctx_t ctxcpu, ctxpr; int s_cycles = atomic_fetch_add (&sched_cycles, 1); if (s_cycles % SCHED_REAP_FREQ == 0) proc_reap (); struct proc* next = NULL; struct cpu* cpu = thiscpu; spin_lock (&cpu->lock, &ctxcpu); struct proc* prev = cpu->proc_current; if (prev != NULL) { spin_lock (&prev->lock, &ctxpr); prev->pdata.regs = *(struct saved_regs*)regs; spin_unlock (&prev->lock, &ctxpr); } next = proc_find_sched (cpu); if (next) { cpu->proc_current = next; spin_unlock (&cpu->lock, &ctxcpu); do_sched (next); } else { cpu->proc_current = NULL; spin_unlock (&cpu->lock, &ctxcpu); spin (); } } void proc_kill (struct proc* proc) { spin_lock_ctx_t ctxpr, ctxcpu; struct cpu* cpu = proc->cpu; spin_lock (&proc->lock, &ctxpr); atomic_store (&proc->state, PROC_DEAD); spin_unlock (&proc->lock, &ctxpr); spin_lock (&cpu->lock, &ctxcpu); rbtree_delete (&cpu->proc_run_q, &proc->cpu_run_q_link); if (cpu->proc_current == proc) cpu->proc_current = NULL; spin_unlock (&cpu->lock, &ctxcpu); DEBUG ("killed PID %d\n", proc->pid); cpu_request_sched (cpu); } void proc_suspend (struct proc* proc, struct proc_suspension_q* sq) { spin_lock_ctx_t ctxpr, ctxcpu, ctxsq; struct cpu* cpu = proc->cpu; spin_lock (&proc->lock, &ctxpr); atomic_store (&proc->state, PROC_SUSPENDED); proc->suspension_q = sq; spin_unlock (&proc->lock, &ctxpr); /* remove from run q */ spin_lock (&cpu->lock, &ctxcpu); rbtree_delete (&cpu->proc_run_q, &proc->cpu_run_q_link); if (cpu->proc_current == proc) cpu->proc_current = NULL; spin_unlock (&cpu->lock, &ctxcpu); spin_lock (&sq->lock, &ctxsq); rbtree_insert (struct proc, &sq->proc_tree, &proc->suspension_link, suspension_link, pid); spin_unlock (&sq->lock, &ctxsq); cpu_request_sched (cpu); } void proc_resume (struct proc* proc) { spin_lock_ctx_t ctxsq, ctxpr, ctxcpu; struct cpu* cpu = proc->cpu; struct proc_suspension_q* sq = proc->suspension_q; spin_lock (&sq->lock, &ctxsq); rbtree_delete (&sq->proc_tree, &proc->suspension_link); spin_unlock (&sq->lock, &ctxsq); spin_lock (&proc->lock, &ctxpr); proc->suspension_q = NULL; atomic_store (&proc->state, PROC_READY); spin_unlock (&proc->lock, &ctxpr); spin_lock (&cpu->lock, &ctxcpu); rbtree_insert (struct proc, &cpu->proc_run_q, &proc->cpu_run_q_link, cpu_run_q_link, pid); spin_unlock (&cpu->lock, &ctxcpu); cpu_request_sched (cpu); } static void proc_irq_sched (void* arg, void* regs) { (void)arg; proc_sched (regs); } static void proc_kpproc_init (void) { struct limine_hhdm_response* hhdm = limine_hhdm_request.response; memset (&kpproc, 0, sizeof (kpproc)); kpproc.lock = SPIN_LOCK_INIT; kpproc.state = PROC_PSEUDO; kpproc.pid = 0; kpproc.resources = malloc (sizeof (*kpproc.resources)); kpproc.resources->tree = NULL; kpproc.resources->lock = RW_SPIN_LOCK_INIT; kpproc.resources->refs = 1; kpproc.resources->sys_rids = 0; kpproc.pd = mm_get_kernel_pd (); kpproc.cpu = thiscpu; rbtree_insert (struct proc, &proc_tree, &kpproc.proc_tree_link, proc_tree_link, pid); /* prepare kernel resources */ { /* frame buffer */ struct limine_framebuffer_response* fb = limine_framebuffer_request.response; struct kpproc_fb fb_info = { .paddr = (uintptr_t)fb->framebuffers[0]->address - (uintptr_t)hhdm->offset, .w = fb->framebuffers[0]->width, .h = fb->framebuffers[0]->height, .pitch = fb->framebuffers[0]->pitch, .bpp = fb->framebuffers[0]->bpp, .red_mask_size = fb->framebuffers[0]->red_mask_size, .red_mask_shift = fb->framebuffers[0]->red_mask_shift, .green_mask_size = fb->framebuffers[0]->green_mask_size, .green_mask_shift = fb->framebuffers[0]->green_mask_shift, .blue_mask_size = fb->framebuffers[0]->blue_mask_size, .blue_mask_shift = fb->framebuffers[0]->blue_mask_shift, }; DEBUG ("Framebuffer address %p\n", fb_info.paddr); size_t pages = align_up (sizeof (fb_info), PAGE_SIZE) / PAGE_SIZE; uintptr_t fb_info_memblk_paddr = pmm_alloc (pages); memcpy ((struct kpproc_fb*)((uintptr_t)hhdm->offset + fb_info_memblk_paddr), &fb_info, sizeof (fb_info)); struct proc_resource_mem_init mem_init = { .pages = pages, .paddr = fb_info_memblk_paddr, .managed = true}; proc_create_resource (&kpproc, 0, PR_MEM, RV_PUBLIC, &mem_init); } } void proc_init (void) { #if defined(__x86_64__) irq_attach (&proc_irq_sched, NULL, SCHED_PREEMPT_TIMER, IRQ_INTERRUPT_SAFE); irq_attach (&proc_irq_sched, NULL, CPU_REQUEST_SCHED, IRQ_INTERRUPT_SAFE); #endif proc_kpproc_init (); struct proc* init = proc_spawn_rd ("init.exe"); proc_register (init, thiscpu); do_sched (init); }