XHCI works on real hardware!

This commit is contained in:
2026-03-24 22:12:36 +01:00
parent b2e4a3802d
commit 0478570b2b
11 changed files with 215 additions and 102 deletions

View File

@@ -58,7 +58,7 @@ static struct pg_index mm_page_index (uint64_t vaddr) {
/* Walk paging tables and allocate necessary structures along the way */
static uint64_t* mm_next_table (uint64_t* table, uint64_t entry_idx, bool alloc) {
uint64_t entry = table[entry_idx];
physaddr_t paddr;
uintptr_t paddr;
struct limine_hhdm_response* hhdm = limine_hhdm_request.response;
@@ -194,7 +194,7 @@ void mm_unmap_kernel_page (uintptr_t vaddr) {
uintptr_t mm_alloc_user_pd_phys (void) {
struct limine_hhdm_response* hhdm = limine_hhdm_request.response;
physaddr_t cr3 = pmm_alloc (1);
uintptr_t cr3 = pmm_alloc (1);
if (cr3 == PMM_ALLOC_ERR)
return 0;

View File

@@ -1,7 +1,7 @@
#include <sys/spin_lock.h>
/// Relax the spinlock using AMD64 pause instruction
void spin_lock_relax (void) { __asm__ volatile ("pause"); }
void spin_lock_relax (void) { __asm__ volatile ("pause" ::: "memory"); }
void spin_lock_save_flags (uint64_t* flags) {
__asm__ volatile ("pushfq; cli; popq %0" : "=rm"(*flags)::"memory");

View File

@@ -12,7 +12,8 @@ c += amd64/bootmain.c \
amd64/sched1.c \
amd64/proc.c \
amd64/syscall.c \
amd64/gdt.c
amd64/gdt.c \
amd64/stall.c
S += amd64/intr_stub.S \
amd64/spin.S \
@@ -39,4 +40,5 @@ o += amd64/bootmain.o \
amd64/syscall.o \
amd64/syscallentry.o \
amd64/gdt.o \
amd64/sse.o
amd64/sse.o \
amd64/stall.o

31
kernel/amd64/stall.c Normal file
View File

@@ -0,0 +1,31 @@
#include <libk/std.h>
#include <sys/spin_lock.h>
#include <sys/stall.h>
static uint64_t stall_read_tsc (void) {
uint32_t lo, hi;
__asm__ volatile ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)hi << 32) | lo;
}
static uint64_t stall_get_tsc_freq_hz (void) {
uint32_t eax, ebx, ecx, edx;
__asm__ volatile ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(0x15));
if (eax == 0 || ebx == 0 || ecx == 0)
return 2500000000ULL;
return (uint64_t)ecx * ebx / eax;
}
void stall_ms (uint64_t ms) {
uint64_t freq_hz = stall_get_tsc_freq_hz ();
uint64_t cycles = freq_hz / 1000;
uint64_t wait_cycles = ms * cycles;
uint64_t now = stall_read_tsc ();
while ((stall_read_tsc () - now) < wait_cycles)
spin_lock_relax ();
}

View File

@@ -13,6 +13,7 @@
#include <proc/suspension_q.h>
#include <sys/debug.h>
#include <sys/spin_lock.h>
#include <sys/stall.h>
/* REF:
* https://www.intel.com/content/dam/www/public/us/en/documents/technical-specifications/extensible-host-controler-interface-usb-xhci.pdf
@@ -119,11 +120,10 @@ static uint32_t xhci_read32 (uintptr_t base, uint32_t reg) {
static void xhci_event_dispatch (struct xhci* xhci, struct xhci_trb* event, uint8_t type) {
switch (type) {
case XHCI_TRB_CMD_CMPL_EVENT: {
uintptr_t cmd_trb_phys = event->param;
uint8_t cmpl_code = (event->status >> 24) & 0xFF;
uint8_t slot_id = (event->ctrl >> 24) & 0xFF;
DEBUG ("cmd completion: phys=%p,code=%u,slot=%u\n", cmd_trb_phys, cmpl_code, slot_id);
DEBUG ("cmd completion: code=%u,slot=%u\n", cmpl_code, slot_id);
} break;
default:
DEBUG ("Unhandled event type %u at %u\n", type, xhci->event_ring_idx);
@@ -179,17 +179,7 @@ static void xhci_irq (void* arg, void* regs, bool user, struct reschedule_ctx* r
uintptr_t ir_base = xhci->xhci_runtime_base + 0x20;
/* ack */
uint32_t iman = xhci_read32 (ir_base, XHCI_IMAN);
xhci_write32 (ir_base, XHCI_IMAN, iman | 0x01);
uint32_t usbsts = xhci_read32 (xhci->xhci_oper_base, XHCI_USBSTS);
xhci_write32 (xhci->xhci_oper_base, XHCI_USBSTS, usbsts | (1 << 3));
/* clear event busy */
uint64_t erdp = (uint64_t)xhci_read32 (ir_base, XHCI_ERDP) |
((uint64_t)xhci_read32 (ir_base, XHCI_ERDP + 4) << 32);
xhci_write32 (ir_base, XHCI_ERDP, (uint32_t)erdp | (1 << 3));
xhci_write32 (ir_base, XHCI_IMAN, xhci_read32 (ir_base, XHCI_IMAN) | (1 << 0));
xhci_poll_events (xhci);
@@ -206,7 +196,7 @@ void xhci_send_cmd (struct xhci* xhci, uint64_t param, uint32_t status, uint32_t
struct xhci_trb* link = &xhci->cmd_ring[xhci->cmd_ring_idx];
link->param = xhci->cmd_ring_phys;
link->status = 0;
link->ctrl = (6 << 10) | (1 << 1) | xhci->cmd_cycle_bit;
link->ctrl = (XHCI_TRB_LINK << 10) | (1 << 1) | xhci->cmd_cycle_bit;
xhci->cmd_ring_idx = 0;
xhci->cmd_cycle_bit ^= 1;
@@ -237,6 +227,48 @@ void xhci_send_cmd (struct xhci* xhci, uint64_t param, uint32_t status, uint32_t
spin_unlock (&xhci->device->lock, fd);
}
static void xhci_bios_handover (struct xhci* xhci) {
uint32_t hccparams1 = xhci_read32 (xhci->xhci_mmio_base, XHCI_HCCPARAMS1);
uint32_t ext_offset = (hccparams1 >> 16) << 2;
if (ext_offset == 0)
return;
while (ext_offset) {
uintptr_t cap_ptr = xhci->xhci_mmio_base + ext_offset;
uint32_t cap = xhci_read32 (cap_ptr, 0);
uint8_t cap_id = cap & 0xFF;
if (cap_id == 1) {
DEBUG ("Found USB Legacy Support at offset 0x%x\n", ext_offset);
if (cap & (1 << 16)) {
DEBUG ("BIOS owns XHCI, requesting handover!\n");
xhci_write8 (cap_ptr, 3, 1);
int timeout = 1000;
while (timeout--) {
uint32_t val = xhci_read32 (cap_ptr, 0);
if (!(val & (1 << 16)) && (val & (1 << 24)))
break;
stall_ms (1);
}
if (timeout <= 0)
DEBUG ("Warning: XHCI BIOS handover timed out!\n");
else
DEBUG ("XHCI Handover successful.\n");
}
break;
}
uint8_t next = (cap >> 8) & 0xFF;
if (!next)
break;
ext_offset += (next << 2);
}
}
DEFINE_DEVICE_INIT (xhci_init) {
struct limine_hhdm_response* hhdm = limine_hhdm_request.response;
@@ -255,7 +287,9 @@ DEFINE_DEVICE_INIT (xhci_init) {
device->udata = xhci;
uint8_t cap_length = xhci_read8 (xhci->xhci_mmio_base, XHCI_CAPLENGTH);
uint32_t usbcmd, config, cap_length;
cap_length = xhci_read8 (xhci->xhci_mmio_base, XHCI_CAPLENGTH);
xhci->xhci_oper_base = xhci->xhci_mmio_base + cap_length;
@@ -265,90 +299,80 @@ DEFINE_DEVICE_INIT (xhci_init) {
uint32_t dboff = xhci_read32 (xhci->xhci_mmio_base, XHCI_DBOFF);
xhci->xhci_doorbell_base = xhci->xhci_mmio_base + dboff;
uint32_t hcsparams2 = xhci_read32 (xhci->xhci_mmio_base, XHCI_HCSPARAMS2);
xhci->max_scratchpad = (((hcsparams2 >> 21) & 0x1F) << 5) | ((hcsparams2 >> 27) & 0x1F);
DEBUG ("starting init sequence\n");
/* assert CNR is 0 */
while (xhci_read32 (xhci->xhci_oper_base, XHCI_USBSTS) & (1 << 11))
spin_lock_relax ();
/* stop running / clear Run/Stop bit */
usbcmd = xhci_read32 (xhci->xhci_oper_base, XHCI_USBCMD);
usbcmd &= ~(1 << 0);
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, usbcmd);
/* STOP */
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, 0);
stall_ms (1000);
/* wait for HCH bit */
int timeout = 100000;
while (!(xhci_read32 (xhci->xhci_oper_base, XHCI_USBSTS) & (1 << 12))) {
if (--timeout == 0)
break;
xhci_bios_handover (xhci);
spin_lock_relax ();
}
/* reset controller */
usbcmd = xhci_read32 (xhci->xhci_oper_base, XHCI_USBCMD);
usbcmd |= (1 << 1);
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, usbcmd);
/* RESET */
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, (1 << 1));
stall_ms (1000);
while (xhci_read32 (xhci->xhci_oper_base, XHCI_USBCMD) & (1 << 1))
spin_lock_relax ();
DEBUG ("controller reset\n");
/* Stall while controller not ready */
while (xhci_read32 (xhci->xhci_oper_base, XHCI_USBSTS) & (1 << 11))
spin_lock_relax ();
xhci->max_slots = xhci_read32 (xhci->xhci_mmio_base, XHCI_HCSPARAMS1) & 0xFF;
DEBUG ("max_slots=%u\n", xhci->max_slots);
DEBUG ("XHCI init done\n");
uint32_t hcsparams1 = xhci_read32 (xhci->xhci_mmio_base, XHCI_HCSPARAMS1);
uint8_t max_slots = (uint8_t)(hcsparams1 & 0xFF);
/* enable device notifications */
xhci_write32 (xhci->xhci_oper_base, XHCI_DNCTRL, 0xFFFF);
/* enable slots */
uint32_t config = xhci_read32 (xhci->xhci_oper_base, XHCI_CONFIG);
xhci_write32 (xhci->xhci_oper_base, XHCI_CONFIG, (config & ~0xFF) | max_slots);
config = xhci_read32 (xhci->xhci_oper_base, XHCI_CONFIG);
xhci_write32 (xhci->xhci_oper_base, XHCI_CONFIG, (config & ~0xFF) | (xhci->max_slots & 0xFF));
DEBUG ("enabled %u slots\n", max_slots);
uint32_t hcsparams2 = xhci_read32 (xhci->xhci_mmio_base, XHCI_HCSPARAMS2);
xhci->max_scratchpad = (hcsparams2 >> 27) & 0x1F;
uintptr_t dcbaa_phys = pmm_alloc (1);
xhci->xhci_dcbaa_phys = dcbaa_phys;
xhci->xhci_dcbaa = (uintptr_t*)(dcbaa_phys + (uintptr_t)hhdm->offset);
/* Prepare DCBAA */
xhci->xhci_dcbaa_phys = pmm_alloc (1);
xhci->xhci_dcbaa = (uintptr_t*)(xhci->xhci_dcbaa_phys + (uintptr_t)hhdm->offset);
memset (xhci->xhci_dcbaa, 0, PAGE_SIZE);
if (xhci->max_scratchpad > 0) {
uintptr_t dev_array_phys = pmm_alloc (1);
xhci->scratchpads_phys = pmm_alloc (1);
xhci->scratchpads = (uintptr_t*)(xhci->scratchpads_phys + (uintptr_t)hhdm->offset);
memset (xhci->scratchpads, 0, PAGE_SIZE);
uintptr_t* dev_array = (uintptr_t*)(dev_array_phys + (uintptr_t)hhdm->offset);
memset (dev_array, 0, PAGE_SIZE);
for (size_t i = 0; i < xhci->max_scratchpad; i++)
dev_array[i] = pmm_alloc (1);
xhci->xhci_dcbaa[0] = dev_array_phys;
for (size_t sp = 0; sp < xhci->max_scratchpad; sp++) {
xhci->scratchpads[sp] = pmm_alloc (1);
}
xhci_write32 (xhci->xhci_oper_base, XHCI_DCBAAP, (uint32_t)xhci->xhci_dcbaa_phys);
xhci_write32 (xhci->xhci_oper_base, XHCI_DCBAAP + 4, (uint32_t)(xhci->xhci_dcbaa_phys >> 32));
xhci->xhci_dcbaa[0] = xhci->scratchpads_phys;
}
xhci->cmd_ring_phys = pmm_alloc (1);
xhci_write32 (xhci->xhci_oper_base, XHCI_DCBAAP + 4, (uint32_t)(xhci->xhci_dcbaa_phys >> 32));
xhci_write32 (xhci->xhci_oper_base, XHCI_DCBAAP, (uint32_t)xhci->xhci_dcbaa_phys);
xhci->cmd_ring_phys = pmm_alloc_aligned (1, 64);
xhci->cmd_ring = (struct xhci_trb*)(xhci->cmd_ring_phys + (uintptr_t)hhdm->offset);
memset (xhci->cmd_ring, 0, PAGE_SIZE);
xhci->cmd_ring_size = PAGE_SIZE / sizeof (struct xhci_trb);
xhci->cmd_ring_idx = 0;
xhci->cmd_cycle_bit = 1;
memset (xhci->cmd_ring, 0, PAGE_SIZE);
uint64_t crcr = xhci->cmd_ring_phys | xhci->cmd_cycle_bit;
xhci_write32 (xhci->xhci_oper_base, XHCI_CRCR, (uint32_t)crcr);
xhci_write32 (xhci->xhci_oper_base, XHCI_CRCR + 4, (uint32_t)(crcr >> 32));
xhci_write32 (xhci->xhci_oper_base, XHCI_CRCR, (uint32_t)crcr);
xhci->event_ring_phys = pmm_alloc (1);
xhci->event_ring_phys = pmm_alloc_aligned (1, 64);
xhci->event_ring = (struct xhci_trb*)(xhci->event_ring_phys + (uintptr_t)hhdm->offset);
memset (xhci->event_ring, 0, PAGE_SIZE);
xhci->event_ring_size = PAGE_SIZE / sizeof (struct xhci_trb);
xhci->event_ring_idx = 0;
xhci->event_cycle_bit = 1;
memset (xhci->event_ring, 0, PAGE_SIZE);
xhci->erst_phys = pmm_alloc (1);
xhci->erst_phys = pmm_alloc_aligned (1, 64);
xhci->erst = (struct xhci_erst_entry*)(xhci->erst_phys + (uintptr_t)hhdm->offset);
memset (xhci->erst, 0, PAGE_SIZE);
xhci->erst[0].ptr = xhci->event_ring_phys;
xhci->erst[0].size = xhci->event_ring_size;
xhci->erst[0]._rsvd = 0;
@@ -358,19 +382,22 @@ DEFINE_DEVICE_INIT (xhci_init) {
xhci_write32 (ir_base, XHCI_ERSTBA, (uint32_t)xhci->erst_phys);
xhci_write32 (ir_base, XHCI_ERSTBA + 4, (uint32_t)(xhci->erst_phys >> 32));
xhci_write32 (ir_base, XHCI_ERDP, (uint32_t)xhci->event_ring_phys | (1 << 3));
xhci_write32 (ir_base, XHCI_ERDP + 4, (uint32_t)(xhci->event_ring_phys >> 32));
xhci_write32 (ir_base, XHCI_ERDP, (uint32_t)xhci->event_ring_phys | (1 << 3));
if (xhci->irqs_support) {
/* enable interrupter */
irq_attach (&xhci_irq, xhci, xhci->irq);
xhci_write32 (ir_base, XHCI_IMAN, xhci_read32 (ir_base, XHCI_IMAN) | 0x02);
xhci_write32 (ir_base, XHCI_IMAN, xhci_read32 (ir_base, XHCI_IMAN) | (1 << 1));
}
uint32_t usbcmd = xhci_read32 (xhci->xhci_oper_base, XHCI_USBCMD);
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, usbcmd | 0x01 | (1 << 2));
usbcmd = xhci_read32 (xhci->xhci_oper_base, XHCI_USBCMD);
xhci_write32 (xhci->xhci_oper_base, XHCI_USBCMD, usbcmd | (1 << 0) | (1 << 2));
xhci_send_cmd (xhci, 0, 0, (23 << 10));
while (xhci_read32 (xhci->xhci_oper_base, XHCI_USBSTS) & (1 << 11))
spin_lock_relax ();
xhci_send_cmd (xhci, 0, 0, XHCI_TRB_SLOT_ENAB_CMD << 10);
return true;
}
@@ -381,16 +408,16 @@ DEFINE_DEVICE_FINI (xhci_fini) {
struct xhci* xhci = device->udata;
if (xhci->max_scratchpad > 0) {
uintptr_t dev_array_phys = xhci->xhci_dcbaa[0];
uintptr_t scratchpads_phys = xhci->xhci_dcbaa[0];
uintptr_t* dev_array = (uintptr_t*)(dev_array_phys + (uintptr_t)hhdm->offset);
uintptr_t* scratchpads = (uintptr_t*)(scratchpads_phys + (uintptr_t)hhdm->offset);
for (size_t i = 0; i < xhci->max_scratchpad; i++) {
if (dev_array[i] != 0)
pmm_free (dev_array[i], 1);
if (scratchpads[i] != 0)
pmm_free (scratchpads[i], 1);
}
pmm_free (dev_array_phys, 1);
pmm_free (scratchpads_phys, 1);
}
pmm_free (xhci->xhci_dcbaa_phys, 1);

View File

@@ -42,7 +42,12 @@ struct xhci {
uintptr_t* xhci_dcbaa;
uintptr_t xhci_dcbaa_phys;
uint32_t max_scratchpad;
uintptr_t* scratchpads;
uintptr_t scratchpads_phys;
uint32_t max_slots;
struct xhci_trb* cmd_ring;
uintptr_t cmd_ring_phys;

View File

@@ -54,7 +54,7 @@ void* mmap (void* addr, size_t size, int prot, int flags, int fd, size_t off) {
size = div_align_up (size, PAGE_SIZE);
physaddr_t p_addr = pmm_alloc (size);
uintptr_t p_addr = pmm_alloc (size);
if (p_addr == PMM_ALLOC_ERR)
return (void*)-1;
@@ -75,7 +75,7 @@ int munmap (void* addr, size_t length) {
struct limine_hhdm_response* hhdm = limine_hhdm_request.response;
physaddr_t p_addr = (uintptr_t)addr - hhdm->offset;
uintptr_t p_addr = (uintptr_t)addr - hhdm->offset;
pmm_free (p_addr, length);

View File

@@ -5,7 +5,6 @@
#include <limine/limine.h>
#include <limine/requests.h>
#include <mm/pmm.h>
#include <mm/types.h>
#include <sync/spin_lock.h>
#include <sys/debug.h>
#include <sys/mm.h>
@@ -43,15 +42,15 @@ void pmm_init (void) {
* */
size_t size = align_down (entry->length, PAGE_SIZE);
physaddr_t start = align_up (entry->base, PAGE_SIZE);
uintptr_t start = align_up (entry->base, PAGE_SIZE);
size_t max_pages = (size * 8) / (PAGE_SIZE * 8 + 1);
size_t bm_nbits = max_pages;
size_t bm_size = align_up (bm_nbits, 8) / 8;
physaddr_t bm_base = start;
physaddr_t data_base = align_up (bm_base + bm_size, PAGE_SIZE);
uintptr_t bm_base = start;
uintptr_t data_base = align_up (bm_base + bm_size, PAGE_SIZE);
if (bm_base + bm_size >= start + size)
continue;
@@ -99,7 +98,23 @@ static size_t pmm_find_free_space (struct pmm_region* pmm_region, size_t nblks)
return (size_t)-1;
}
physaddr_t pmm_alloc (size_t nblks) {
static size_t pmm_find_free_space_aligned (struct pmm_region* pmm_region, size_t nblks,
size_t align_pages) {
if (align_pages == 0)
align_pages = 1;
for (size_t bit = 0; bit < pmm_region->bm.nbits; bit += align_pages) {
if (bm_test_region (&pmm_region->bm, bit, nblks)) {
continue;
}
return bit;
}
return (size_t)-1;
}
uintptr_t pmm_alloc (size_t nblks) {
uint64_t fpr;
for (size_t region = 0; region < PMM_REGIONS_MAX; region++) {
@@ -129,11 +144,41 @@ physaddr_t pmm_alloc (size_t nblks) {
return PMM_ALLOC_ERR;
}
void pmm_free (physaddr_t p_addr, size_t nblks) {
uintptr_t pmm_alloc_aligned (size_t nblks, size_t align_pages) {
uint64_t fpr;
for (size_t region = 0; region < PMM_REGIONS_MAX; region++) {
struct pmm_region* pmm_region = &pmm.regions[region];
/* Inactive region, so don't bother with it. */
if (!(pmm_region->flags & PMM_REGION_ACTIVE))
continue;
spin_lock (&pmm_region->lock, &fpr);
/* Find starting bit of the free bit range */
size_t bit = pmm_find_free_space_aligned (pmm_region, nblks, align_pages);
/* Found a free range? */
if (bit != (size_t)-1) {
/* Mark it */
bm_set_region (&pmm_region->bm, bit, nblks);
spin_unlock (&pmm_region->lock, fpr);
return pmm_region->membase + bit * PAGE_SIZE;
}
spin_unlock (&pmm_region->lock, fpr);
}
return PMM_ALLOC_ERR;
}
void pmm_free (uintptr_t p_addr, size_t nblks) {
uint64_t fpr;
/* Round down to nearest page boundary */
physaddr_t aligned_p_addr = align_down (p_addr, PAGE_SIZE);
uintptr_t aligned_p_addr = align_down (p_addr, PAGE_SIZE);
for (size_t region = 0; region < PMM_REGIONS_MAX; region++) {
struct pmm_region* pmm_region = &pmm.regions[region];
@@ -145,7 +190,7 @@ void pmm_free (physaddr_t p_addr, size_t nblks) {
/* If aligned_p_addr is within the range if this region, it belongs to it. */
if (aligned_p_addr >= pmm_region->membase &&
aligned_p_addr < pmm_region->membase + pmm_region->size) {
physaddr_t addr = aligned_p_addr - pmm_region->membase;
uintptr_t addr = aligned_p_addr - pmm_region->membase;
size_t bit = div_align_up (addr, PAGE_SIZE);

View File

@@ -3,10 +3,9 @@
#include <libk/bm.h>
#include <libk/std.h>
#include <mm/types.h>
#include <sync/spin_lock.h>
#define PMM_ALLOC_ERR ((physaddr_t) - 1)
#define PMM_ALLOC_ERR ((uintptr_t)-1)
#define PMM_REGIONS_MAX 32
@@ -15,7 +14,7 @@
struct pmm_region {
spin_lock_t lock;
struct bm bm;
physaddr_t membase;
uintptr_t membase;
size_t size;
uint32_t flags;
};
@@ -25,7 +24,11 @@ struct pmm {
};
void pmm_init (void);
physaddr_t pmm_alloc (size_t nblks);
void pmm_free (physaddr_t p_addr, size_t nblks);
uintptr_t pmm_alloc (size_t nblks);
uintptr_t pmm_alloc_aligned (size_t nblks, size_t align_pages);
void pmm_free (uintptr_t p_addr, size_t nblks);
#endif // _KERNEL_MM_PMM_H

View File

@@ -1,8 +0,0 @@
#ifndef _KERNEL_MM_TYPES_H
#define _KERNEL_MM_TYPES_H
#include <libk/std.h>
typedef uintptr_t physaddr_t;
#endif // _KERNEL_MM_TYPES_H

8
kernel/sys/stall.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef _KERNEL_SYS_STALL_H
#define _KERNEL_SYS_STALL_H
#include <libk/std.h>
void stall_ms (uint64_t ms);
#endif // _KERNEL_SYS_STALL_H