From 0da47c51de7ea212b419a887a60437891f493a91 Mon Sep 17 00:00:00 2001 From: tkchia Date: Sun, 18 Dec 2022 09:51:20 +0800 Subject: [PATCH] [metal] Allow programs larger than 440 KiB to run in bare metal mode (#685) * [metal] Copy program pages to extended memory at startup * [metal] Reclaim base memory pages for later app use * [metal] Load program pages beyond 1st 440 KiB to extended memory o//examples/hellolua.com now runs correctly under QEMU (in legacy BIOS mode). * [metal] Place GDT in read/write segment The CPU absolutely needs to alter the GDT when loading the task register (via ltr). To account for this, I move the GDT into a read/write data section. There is still a "rump" read-only GDT in the text section that is used by the real mode bootloader. We also delay the loading of the task register (ltr) until after the IDT and TSS are finally set up. * [metal] Get examples/vga2.c serial output working for UEFI boot * [metal] Get examples/vga2.c VGA output working for UEFI boot * [metal] Allow munmap() to reclaim dynamically allocated pages * Place TLS sections right after .text, not after embedded zip file Co-authored-by: tkchia --- ape/ape.S | 205 ++++++++++++++++++++++++++-------- ape/ape.lds | 65 ++++++----- examples/vga2.c | 6 + libc/calls/metalfile.c | 4 +- libc/calls/munmap-metal.c | 13 ++- libc/intrin/directmap-metal.c | 22 ++-- libc/intrin/interrupts.S | 5 +- libc/intrin/kprintf.greg.c | 2 +- libc/intrin/mman.greg.c | 139 ++++++++++++++++++++--- libc/nt/efi.h | 80 ++++++++++++- libc/runtime/efimain.greg.c | 189 +++++++++++++++++++++---------- libc/runtime/efipostboot.S | 126 +++++++++++++++++++++ libc/runtime/mman.internal.h | 43 ++++--- libc/runtime/pc.internal.h | 13 ++- libc/stubs/ld.S | 3 + libc/vga/vga-init.greg.c | 1 + 16 files changed, 725 insertions(+), 191 deletions(-) create mode 100644 libc/runtime/efipostboot.S diff --git a/ape/ape.S b/ape/ape.S index 234b5d07d..41d4c2b7e 100644 --- a/ape/ape.S +++ b/ape/ape.S @@ -282,6 +282,9 @@ pc: cld xor %dh,%dh # current head mov $v_ape_realsectors,%di # total sectors 3: call pcread + mov %es,%si # addr += 512 + add $512>>4,%si + mov %si,%es dec %di jnz 3b 6: ljmp $0,$REAL(realmodeloader) @@ -370,6 +373,7 @@ dsknfo: push %bx pop %ds xchg %bx,%ax stosw #→ pc_drive_type, pc_drive_last_sector + scasb xchg %cx,%ax stosw #→ pc_drive_last_cylinder xchg %dx,%ax @@ -408,9 +412,6 @@ pcread: push %ax pop %cx pop %ax jc 9f - mov %es,%si # addr += 512 - add $512>>4,%si - mov %si,%es inc %al # ++sector cmp mm+"struct mman::pc_drive_last_sector",%al jbe 2f @@ -1111,6 +1112,42 @@ ape_idata_idt: ape_idata_idtend: .previous + .section .piro.data.sort.metal_gdt,"aw",@progbits + .align 8 +_gdt: +// ┌G:granularity (1 → limit *= 0x1000) +// │┌D/B:default operation size (0 = 16|64bit, 1 = 32-bit) +// ││┌L:long mode +// │││┌AVL:this bit is thine (1<<52) +// ││││ ┌P:present +// ││││ │┌DPL:privilege +// ││││ ││ ┌─────────data/code(1) +// ││││ ││ │┌────data(0)──────code(1) +// ││││ ││ ││┌───conforming───expand-down +// ││││ ││ │││┌──writeable────readable +// ││││ ││ ││││┌─accessed─────accessed +// ││││ ││ │││││ +// ││││ ┌──││─│││││───────────────────────────────┐ +// ┌───││││─│──││─│││││───────────┐ │ +// ┌───┴──┐││││┌┴─┐│├┐│││││┌──────────┴───────────┐┌──────┴───────┐ +// │ ││││││ ││││││││││ base address││ segment limit│ +// │ ││││││ ││││││││││ 32 bits││ 20 bits│ +// │ ││││││ ││││││││││ ││ │ +// 6666555555555544444444443333333333222222222211111111110000000000 +// 3210987654321098765432109876543210987654321098765432109876543210 +// │ ││││││ ││││││││││ ││ │ +.quad 0b0000000000000000000000000000000000000000000000000000000000000000 # 0 +.quad 0b0000000000001111100110100000000000000000000000001111111111111111 # 8 +.quad 0b0000000000001111100100100000000000000000000000001111111111111111 #16 +.quad 0b0000000011001111100110100000000000000000000000001111111111111111 #24 +.quad 0b0000000011001111100100100000000000000000000000001111111111111111 #32 +.quad 0b0000000010101111100110110000000000000000000000001111111111111111 #40 +.quad 0b0000000010101111100100110000000000000000000000001111111111111111 #48 +.tssdescstub _tss #56,64 +_gdt_end: + .endobj _gdt,global,hidden + .previous + .section .piro.data.sort.iat.1,"aw",@progbits .type ape_idata_iatend,@object .type ape_idata_iat,@object @@ -1168,48 +1205,21 @@ sconf: .short 1843200/*hz*/ / 16/*wut*/ / 9600/*baud*/ // Global Descriptor Table .align 8 -_gdtrphy: - .short 2f-1f-1 # table byte length - .long REAL(1f) # table address (physical space) - .endobj _gdtrphy,global,hidden +_gdtrlo: + .short 2f-_gdtlo-1 # table byte length + .long REAL(_gdtlo) # table address (base memory space) + .endobj _gdtrlo,global,hidden _gdtr: - .short 2f-1f-1 # table byte length - .quad 1f # table address (final virtual space) + .short _gdt_end-_gdt-1 # table byte length + .quad _gdt # table address (final virtual space) .endobj _gdtr,global,hidden .align 8 -_gdt: -1: -// ┌G:granularity (1 → limit *= 0x1000) -// │┌D/B:default operation size (0 = 16|64bit, 1 = 32-bit) -// ││┌L:long mode -// │││┌AVL:this bit is thine (1<<52) -// ││││ ┌P:present -// ││││ │┌DPL:privilege -// ││││ ││ ┌─────────data/code(1) -// ││││ ││ │┌────data(0)──────code(1) -// ││││ ││ ││┌───conforming───expand-down -// ││││ ││ │││┌──writeable────readable -// ││││ ││ ││││┌─accessed─────accessed -// ││││ ││ │││││ -// ││││ ┌──││─│││││───────────────────────────────┐ -// ┌───││││─│──││─│││││───────────┐ │ -// ┌───┴──┐││││┌┴─┐│├┐│││││┌──────────┴───────────┐┌──────┴───────┐ -// │ ││││││ ││││││││││ base address││ segment limit│ -// │ ││││││ ││││││││││ 32 bits││ 20 bits│ -// │ ││││││ ││││││││││ ││ │ -// 6666555555555544444444443333333333222222222211111111110000000000 -// 3210987654321098765432109876543210987654321098765432109876543210 -// │ ││││││ ││││││││││ ││ │ -.quad 0b0000000000000000000000000000000000000000000000000000000000000000 # 0 -.quad 0b0000000000001111100110100000000000000000000000001111111111111111 # 8 -.quad 0b0000000000001111100100100000000000000000000000001111111111111111 #16 -.quad 0b0000000011001111100110100000000000000000000000001111111111111111 #24 +// Partial GDT with descriptors for switching to unreal mode or long mode. +_gdtlo = .-GDT_LEGACY_DATA .quad 0b0000000011001111100100100000000000000000000000001111111111111111 #32 .quad 0b0000000010101111100110110000000000000000000000001111111111111111 #40 .quad 0b0000000010101111100100110000000000000000000000001111111111111111 #48 -.tssdescstub _tss #56,64 2: - .endobj _gdt,global,hidden /*─────────────────────────────────────────────────────────────────────────────╗ │ αcτµαlly pδrταblε εxεcµταblε § real mode │ @@ -1217,6 +1227,7 @@ _gdt: the default mode of operation on modern cpus */ realmodeloader: + call lhinit call rlinit call sinit4 .optfn _start16 @@ -1224,6 +1235,25 @@ realmodeloader: call longmodeloader .endfn realmodeloader +// Prepares to later load parts of the program that are not loaded yet. +// +// @param al next sector number +// @param cx next cylinder number +// @param dh next head number +// @param dl drive number +// @clob ax, cx, es, di +lhinit: cmp $0x40,%dl + je 9f + pushpop 0,%es + mov $mm+"struct mman::pc_drive_next_sector",%di + cld + stosb #→ pc_drive_next_sector + xchg %ax,%cx + stosw #→ pc_drive_next_cylinder + mov %dh,%al + stosb #→ pc_drive_next_head +9: ret + .section .sort.text.real.init.1,"ax",@progbits .type rlinit,@function rlinit: .previous/* @@ -1284,6 +1314,8 @@ longmodeloader: call lcheck call a20 call e820 + call cpyhi + call loadhi call pinit call golong .endfn longmodeloader @@ -1426,10 +1458,88 @@ a20: cli 5: ret .endfn a20 +// Copies program pages loaded into base memory, to extended memory. +// +// @clob esi, edi +cpyhi: push %es + movpp %ds,%es + call unreal + mov $IMAGE_BASE_REAL,%esi + mov $IMAGE_BASE_PHYSICAL,%edi + mov $v_ape_realdwords,%ecx + cld + rep movsl %ds:(%esi),%es:(%edi) + sti + pop %es + ret + .endfn cpyhi + +// Disables interrupts and switches to "unreal mode". +// +// @return ds, es have same base addresses as before but can access +// 4 GiB of memory +// @clob eax +unreal: push %ds + push %es + cli + lgdt REAL(_gdtrlo) + mov %cr0,%eax + or $CR0_PE,%al + mov %eax,%cr0 + jmp 0f +0: pushpop GDT_LEGACY_DATA,%ds + movpp %ds,%es + and $~CR0_PE,%al + mov %eax,%cr0 + jmp 1f +1: pop %es + pop %ds + ret + +// Reads any remaining program pages into memory which have not yet +// been read by the boot sector. +// +// @clob eax, ecx, dx, esi, edi, bp +loadhi: mov $v_ape_highsectors,%bp + test %bp,%bp + jz 9f + mov $mm+"struct mman::pc_drive",%si + cld + lodsb #← pc_drive + xchg %ax,%dx + lodsw + lodsb #← pc_drive_next_sector + test %al,%al + jz 9f + xchg %ax,%cx + lodsw #← pc_drive_next_cylinder + xchg %ax,%cx + mov (%si),%dh #← pc_drive_next_head + push %es +#define SEG 0x79000 + mov $IMAGE_BASE_PHYSICAL+v_ape_realbytes-SEG,%edi + push $SEG>>4 + pop %es +0: call pcread + push %ax + call unreal + pop %ax + push %cx + xor %esi,%esi + xor %ecx,%ecx + mov $512/4,%cl + cld + rep movsl %es:(%esi),%es:(%edi) + sti + pop %cx + dec %bp + jnz 0b + pop %es +9: ret + // Initializes long mode paging. pinit: push %ds push %es -#define SEG 0x79000 mov $SEG>>4,%ax mov %ax,%ds mov %ax,%es @@ -1470,7 +1580,7 @@ golong: cli rdmsr or $EFER_LME|EFER_SCE|EFER_NXE,%eax wrmsr - lgdt REAL(_gdtrphy) + lgdt REAL(_gdtrlo) mov %cr0,%eax or $CR0_PE|CR0_PG|CR0_MP,%eax and $~CR0_EM,%eax @@ -1480,7 +1590,9 @@ golong: cli // Long mode is long. .code64 -long: xor %eax,%eax +long: movabs $BANE+PHYSICAL(0f),%rax + jmp *%rax +0: xor %eax,%eax mov $GDT_LONG_DATA,%al mov %eax,%ds mov %eax,%ss @@ -1488,18 +1600,16 @@ long: xor %eax,%eax mov %eax,%fs mov %eax,%gs mov $0x80000,%esp - mov $GDT_LONG_TSS,%al - ltr %ax xor %r12d,%r12d xor %r13d,%r13d xor %r14d,%r14d xor %r15d,%r15d xor %ebx,%ebx xor %ebp,%ebp - push %rbp mov $mm,%rdi mov %cr3,%rsi - mov $IMAGE_BASE_REAL,%edx + mov $IMAGE_BASE_PHYSICAL,%edx + lea v_ape_allbytes(%rdx),%ecx call __map_phdrs push $0x037f fldcw (%rsp) @@ -1585,6 +1695,10 @@ kernel: movabs $ape_stack_vaddr,%rsp .byte 0x0f,0x1f,0207 # nop rdi binbase .long (IMAGE_BASE_VIRTUAL-IMAGE_BASE_REAL)/512 #endif + movabs $BANE+mm,%rdi + mov $0x79000,%esi + mov $0x7f000,%edx + call __reclaim_boot_pages push $_HOSTMETAL # sets __hostos in crt.S pop %rcx pushq .Lenv0(%rip) # envp[0][0] @@ -1630,6 +1744,7 @@ kernel: movabs $ape_stack_vaddr,%rsp .ldsvar _end .ldsvar _etext .ldsvar v_ape_realsectors + .ldsvar v_ape_realbytes .ldsvar v_ape_highsectors .ldsvar ape_idata_ro .ldsvar ape_pad_rodata diff --git a/ape/ape.lds b/ape/ape.lds index 3225143b1..c40739b6d 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -349,9 +349,30 @@ SECTIONS { HIDDEN(_etext = .); PROVIDE_HIDDEN(etext = .); /*END: Read Only Data (only needed for initialization) */ -/*END: Read Only Data */ } :Rom +/* initialization image for thread-local storage, this is copied */ +/* out to actual TLS areas at runtime, so just make it read-only */ + .tdata . : { + _tdata_start = .; + *(SORT_BY_ALIGNMENT(.tdata)) + *(SORT_BY_ALIGNMENT(.tdata.*)) + _tdata_end = .; + . = ALIGN(PAGESIZE); + } :Tls :Rom +/*END: Read Only Data */ + +/* this only tells the linker about the layout of uninitialized */ +/* TLS data, and does not advance the linker's location counter */ + .tbss . : { + _tbss_start = .; + *(SORT_BY_ALIGNMENT(.tbss)) + *(SORT_BY_ALIGNMENT(.tbss.*)) + . = ALIGN(TLS_ALIGNMENT); + /* the %fs register is based on this location */ + _tbss_end = .; + } :Tls + .data ALIGN(PAGESIZE) : { /*BEGIN: Read/Write Data */ KEEP(*(SORT_BY_NAME(.piro.data.sort.iat.*))) @@ -385,26 +406,7 @@ SECTIONS { . = ALIGN(PAGESIZE); } :Ram - .tdata . : { - _tdata_start = .; - *(SORT_BY_ALIGNMENT(.tdata)) - *(SORT_BY_ALIGNMENT(.tdata.*)) - _tdata_end = .; - . = ALIGN(PAGESIZE); - } :Tls :Ram - /*END: file content that's loaded by o/s */ -/*BEGIN: bss memory void */ - - .tbss . : { - _tbss_start = .; - *(SORT_BY_ALIGNMENT(.tbss)) - *(SORT_BY_ALIGNMENT(.tbss.*)) - . = ALIGN(TLS_ALIGNMENT); - /* the %fs register is based on this location */ - _tbss_end = .; - } :Tls - /*END: file content */ /*BEGIN: bss memory that's addressable */ @@ -509,7 +511,7 @@ HIDDEN(ape_rom_rva = RVA(ape_rom_vaddr)); HIDDEN(ape_ram_offset = ape_rom_offset + ape_rom_filesz); HIDDEN(ape_ram_vaddr = ADDR(.data)); HIDDEN(ape_ram_paddr = LOADADDR(.data)); -HIDDEN(ape_ram_filesz = SIZEOF(.data) + SIZEOF(.tdata)); +HIDDEN(ape_ram_filesz = SIZEOF(.data)); HIDDEN(ape_ram_memsz = ADDR(.bss) + SIZEOF(.bss) - ape_ram_vaddr); HIDDEN(ape_ram_align = PAGESIZE); HIDDEN(ape_ram_rva = RVA(ape_ram_vaddr)); @@ -533,16 +535,16 @@ HIDDEN(ape_note_align = __SIZEOF_POINTER__); HIDDEN(ape_text_offset = ape_rom_offset + LOADADDR(.text) - ape_rom_paddr); HIDDEN(ape_text_paddr = LOADADDR(.text)); HIDDEN(ape_text_vaddr = ADDR(.text)); -HIDDEN(ape_text_filesz = SIZEOF(.text)); -HIDDEN(ape_text_memsz = SIZEOF(.text)); +HIDDEN(ape_text_filesz = SIZEOF(.text) + SIZEOF(.tdata)); +HIDDEN(ape_text_memsz = SIZEOF(.text) + SIZEOF(.tdata)); HIDDEN(ape_text_align = PAGESIZE); HIDDEN(ape_text_rva = RVA(ape_text_vaddr)); HIDDEN(ape_data_offset = ape_ram_offset + LOADADDR(.data) - ape_ram_paddr); HIDDEN(ape_data_paddr = LOADADDR(.data)); HIDDEN(ape_data_vaddr = ADDR(.data)); -HIDDEN(ape_data_filesz = SIZEOF(.data) + SIZEOF(.tdata)); -HIDDEN(ape_data_memsz = SIZEOF(.data) + SIZEOF(.tdata)); +HIDDEN(ape_data_filesz = SIZEOF(.data)); +HIDDEN(ape_data_memsz = SIZEOF(.data)); HIDDEN(ape_data_align = PAGESIZE); HIDDEN(ape_data_rva = RVA(ape_data_vaddr)); @@ -564,11 +566,12 @@ SHSTUB2(ape_loader_dd_count, #if SupportsMetal() HIDDEN(v_ape_realsectors = - MIN(0x70000 - IMAGE_BASE_REAL, - ROUNDUP(RVA(_tdata_end), 512)) / 512); -HIDDEN(v_ape_realpages = v_ape_realsectors / (4096 / 512)); -HIDDEN(v_ape_highsectors = - (ROUNDUP(RVA(_tdata_end), 512) / 512) - v_ape_realsectors); + MIN(0x70000 - IMAGE_BASE_REAL, ROUNDUP(RVA(_ezip), 512)) / 512); +HIDDEN(v_ape_realbytes = v_ape_realsectors * 512); +HIDDEN(v_ape_realdwords = v_ape_realsectors * (512 / 4)); +HIDDEN(v_ape_allsectors = ROUNDUP(RVA(_ezip), 512) / 512); +HIDDEN(v_ape_allbytes = v_ape_allsectors * 512); +HIDDEN(v_ape_highsectors = v_ape_allsectors - v_ape_realsectors); TSSDESCSTUB2(_tss, _tss, _tss_end ? _tss_end - _tss - 1 : 0); #endif @@ -651,7 +654,7 @@ CHURN(ADDR(.bss)); CHURN(_start); CHURN(ape_phdrs); #if SupportsMetal() -CHURN(v_ape_realsectors); +CHURN(v_ape_allsectors); #endif #if SupportsXnu() CHURN(ape_macho); diff --git a/examples/vga2.c b/examples/vga2.c index 0343cf211..ab846106d 100644 --- a/examples/vga2.c +++ b/examples/vga2.c @@ -24,10 +24,16 @@ STATIC_YOINK("vga_console"); STATIC_YOINK("_idt"); +STATIC_YOINK("EfiMain"); int main(int argc, char *argv[]) { + int i; volatile int x = 1; volatile int y = 2; + printf("argc = %d\n", argc); + for (i = 0; i < argc; ++i) { + printf("argv[%d] = \"%s\"\n", i, argv[i]); + } printf("\e[92;44mHello World!\e[0m %d\n", 1 / (x + y - 3)); for (;;); } diff --git a/libc/calls/metalfile.c b/libc/calls/metalfile.c index efa9292db..ddfe07f8a 100644 --- a/libc/calls/metalfile.c +++ b/libc/calls/metalfile.c @@ -61,14 +61,14 @@ textstartup noasan void InitializeMetalFile(void) { * The zipos code will automatically arrange to do this. Alternatively, * user code can STATIC_YOINK this symbol. */ - size_t size = ROUNDUP(_tdata_end - _base, 4096); + size_t size = ROUNDUP(_ezip - _base, 4096); void *copied_base; struct DirectMap dm; dm = sys_mmap_metal(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED_linux | MAP_ANONYMOUS_linux, -1, 0); copied_base = dm.addr; _npassert(copied_base != (void *)-1); - memcpy(copied_base, (void *)(BANE + IMAGE_BASE_REAL), size); + memcpy(copied_base, (void *)(BANE + IMAGE_BASE_PHYSICAL), size); __ape_com_base = copied_base; __ape_com_size = size; } diff --git a/libc/calls/munmap-metal.c b/libc/calls/munmap-metal.c index ee9715264..bfa29e104 100644 --- a/libc/calls/munmap-metal.c +++ b/libc/calls/munmap-metal.c @@ -21,13 +21,18 @@ noasan int sys_munmap_metal(void *addr, size_t size) { size_t i; - uint64_t *e; + uint64_t *e, paddr; struct mman *mm; + uint64_t *pml4t = __get_pml4t(); mm = (struct mman *)(BANE + 0x0500); for (i = 0; i < size; i += 4096) { - e = __get_virtual(mm, __get_pml4t(), (uint64_t)addr + i, false); - if (e) *e = ~(PAGE_V | PAGE_RSRV); - invlpg((uint64_t)addr + i); + e = __get_virtual(mm, pml4t, (uint64_t)addr + i, false); + if (e) { + paddr = *e & PAGE_TA; + *e &= ~(PAGE_V | PAGE_RSRV); + invlpg((uint64_t)addr + i); + __unref_page(mm, pml4t, paddr); + } } return 0; } diff --git a/libc/intrin/directmap-metal.c b/libc/intrin/directmap-metal.c index 2c3feb973..2ff56c3a7 100644 --- a/libc/intrin/directmap-metal.c +++ b/libc/intrin/directmap-metal.c @@ -44,7 +44,7 @@ noasan struct DirectMap sys_mmap_metal(void *vaddr, size_t size, int prot, size_t i; struct mman *mm; struct DirectMap res; - uint64_t addr, faddr = 0, page, *pte, *fdpte, *pml4t; + uint64_t addr, faddr = 0, page, e, *pte, *fdpte, *pml4t; mm = (struct mman *)(BANE + 0x0500); pml4t = __get_pml4t(); size = ROUNDUP(size, 4096); @@ -78,27 +78,27 @@ noasan struct DirectMap sys_mmap_metal(void *vaddr, size_t size, int prot, sys_mmap_metal_break = MAX(addr + size, sys_mmap_metal_break); } for (i = 0; i < size; i += 4096) { - page = __new_page(mm); pte = __get_virtual(mm, pml4t, addr + i, true); if (pte) { if ((flags & MAP_ANONYMOUS_linux)) { page = __new_page(mm); if (!page) return bad_mmap(); __clear_page(BANE + page); - page |= PAGE_RSRV | PAGE_U; + e = page | PAGE_RSRV | PAGE_U; if ((prot & PROT_WRITE)) - page |= PAGE_V | PAGE_RW; + e |= PAGE_V | PAGE_RW; else if ((prot & (PROT_READ | PROT_EXEC))) - page |= PAGE_V; - if (!(prot & PROT_EXEC)) page |= PAGE_XD; + e |= PAGE_V; + if (!(prot & PROT_EXEC)) e |= PAGE_XD; } else { fdpte = __get_virtual(mm, pml4t, faddr + i, false); - page = *fdpte; - page |= PAGE_RSRV | PAGE_U; - if (!(prot & PROT_WRITE)) page &= ~PAGE_RW; - if (!(prot & PROT_EXEC)) page |= PAGE_XD; + e = *fdpte | PAGE_RSRV | PAGE_U; + page = e & PAGE_TA; + if (!(prot & PROT_WRITE)) e &= ~PAGE_RW; + if (!(prot & PROT_EXEC)) e |= PAGE_XD; } - *pte = page; + __ref_page(mm, pml4t, page); + *pte = e; invlpg(addr + i); } else { addr = -1; diff --git a/libc/intrin/interrupts.S b/libc/intrin/interrupts.S index e66e8feb4..933278725 100644 --- a/libc/intrin/interrupts.S +++ b/libc/intrin/interrupts.S @@ -109,8 +109,7 @@ __excep0_isr: isr_init: testb IsMetal() jz 9f - ezlea _tss+0x24,di # fill up TSS, we already loaded - # task register in ape/ape.S + ezlea _tss+0x24,di # fill up TSS ezlea _isr_stk_1+ISR_STK_SZ,ax and $-ISR_STK_ALIGN,%al # be paranoid & enforce correct stosq # alignment of stack pointers @@ -142,6 +141,8 @@ isr_init: stosq add $__excep1_isr-__excep0_isr,%rdx loop 1b + mov $GDT_LONG_TSS,%cl # load task register (cx = 0 here) + ltr %cx 9: ret // String constants. diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index 614ebc1ef..d67b73a70 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -177,6 +177,7 @@ privileged static void klog(const char *b, size_t n) { __imp_WriteFile(__imp_GetStdHandle(kNtStdErrorHandle), b, n, &wrote, 0); __imp_SetLastError(e); } else if (IsMetal()) { + if (_weaken(_klog_vga)) _weaken(_klog_vga)(b, n); for (i = 0; i < n; ++i) { for (;;) { dx = 0x3F8 + UART_LSR; @@ -189,7 +190,6 @@ privileged static void klog(const char *b, size_t n) { : /* no inputs */ : "a"(b[i]), "dN"(dx)); } - if (_weaken(_klog_vga)) _weaken(_klog_vga)(b, n); } else { asm volatile("syscall" : "=a"(rax), "=D"(rdi), "=S"(rsi), "=d"(rdx) diff --git a/libc/intrin/mman.greg.c b/libc/intrin/mman.greg.c index 88574445a..ce3f04b43 100644 --- a/libc/intrin/mman.greg.c +++ b/libc/intrin/mman.greg.c @@ -33,6 +33,7 @@ │ αcτµαlly pδrταblε εxεcµταblε § no-frills virtual memory management │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "ape/relocations.h" +#include "libc/assert.h" #include "libc/elf/def.h" #include "libc/elf/struct/phdr.h" #include "libc/macros.internal.h" @@ -42,18 +43,33 @@ #include "libc/runtime/pc.internal.h" #include "libc/runtime/runtime.h" +#define INVERT(x) (BANE + PHYSICAL(x)) +#define NOPAGE ((uint64_t)-1) + +struct ReclaimedPage { + uint64_t next; +}; + /** * Allocates new page of physical memory. */ noasan texthead uint64_t __new_page(struct mman *mm) { - uint64_t p; + uint64_t p = mm->frp; + if (p != NOPAGE) { + uint64_t q; + struct ReclaimedPage *rp = (struct ReclaimedPage *)(BANE + p); + _unassert(p == (p & PAGE_TA)); + q = rp->next; + _unassert(q == (q & PAGE_TA)); + mm->frp = q; + return p; + } if (mm->pdpi == mm->e820n) { - /* TODO: reclaim free pages */ return 0; } while (mm->pdp >= mm->e820[mm->pdpi].addr + mm->e820[mm->pdpi].size) { if (++mm->pdpi == mm->e820n) return 0; - mm->pdp = mm->e820[mm->pdpi].addr; + mm->pdp = MAX(mm->pdp, mm->e820[mm->pdpi].addr); } p = mm->pdp; mm->pdp += 4096; @@ -84,7 +100,7 @@ noasan textreal uint64_t *__get_virtual(struct mman *mm, uint64_t *t, /** * Sorts, rounds, and filters BIOS memory map. */ -static noasan textreal void __normalize_e820(struct mman *mm) { +static noasan textreal void __normalize_e820(struct mman *mm, uint64_t top) { uint64_t a, b; uint64_t x, y; unsigned i, j, n; @@ -107,18 +123,21 @@ static noasan textreal void __normalize_e820(struct mman *mm) { } mm->e820[j] = mm->e820[i]; } - mm->pdp = MAX(0x80000, mm->e820[0].addr); + top = ROUNDUP(top, 4096); + mm->pdp = MAX(top, mm->e820[0].addr); mm->pdpi = 0; mm->e820n = n; + mm->frp = NOPAGE; } /** * Identity maps an area of physical memory to its negative address. */ -noasan textreal void __invert_memory_area(struct mman *mm, uint64_t *pml4t, - uint64_t ps, uint64_t size, - uint64_t pte_flags) { - uint64_t pe = ps + size, p, *m; +noasan textreal uint64_t *__invert_memory_area(struct mman *mm, + uint64_t *pml4t, + uint64_t ps, uint64_t size, + uint64_t pte_flags) { + uint64_t pe = ps + size, p, *m = NULL; ps = ROUNDDOWN(ps, 4096); pe = ROUNDUP(pe, 4096); for (p = ps; p != pe; p += 4096) { @@ -127,6 +146,62 @@ noasan textreal void __invert_memory_area(struct mman *mm, uint64_t *pml4t, *m = p | PAGE_V | PAGE_RSRV | pte_flags; } } + return m; +} + +/** + * Increments the reference count for a page of physical memory. + */ +noasan void __ref_page(struct mman *mm, uint64_t *pml4t, uint64_t p) { + uint64_t *m, e; + m = __invert_memory_area(mm, pml4t, p, 4096, PAGE_RW | PAGE_XD); + if (m) { + e = *m; + if ((e & PAGE_REFC) != PAGE_REFC) { + e += PAGE_1REF; + *m = e; + } + } +} + +/** + * Increments the reference counts for an area of physical memory. + */ +noasan void __ref_pages(struct mman *mm, uint64_t *pml4t, uint64_t ps, + uint64_t size) { + uint64_t p = ROUNDDOWN(ps, 4096), e = ROUNDUP(ps + size, 4096); + while (p != e) { + __ref_page(mm, pml4t, p); + p += 4096; + } +} + +/** + * Reclaims a page of physical memory for later use. + */ +static noasan void __reclaim_page(struct mman *mm, uint64_t p) { + struct ReclaimedPage *rp = (struct ReclaimedPage *)(BANE + p); + _unassert(p == (p & PAGE_TA)); + rp->next = mm->frp; + mm->frp = p; +} + +/** + * Decrements the reference count for a page of physical memory. Frees the + * page if there are no virtual addresses (excluding the negative space) + * referring to it. + */ +noasan void __unref_page(struct mman *mm, uint64_t *pml4t, uint64_t p) { + uint64_t *m, e; + m = __invert_memory_area(mm, pml4t, p, 4096, PAGE_RW | PAGE_XD); + if (m) { + e = *m; + if ((e & PAGE_REFC) != PAGE_REFC) { + e -= PAGE_1REF; + *m = e; + if ((e & PAGE_REFC) == 0) __reclaim_page(mm, p); + } + } } /** @@ -155,13 +230,18 @@ static noasan textreal void __invert_memory(struct mman *mm, uint64_t *pml4t) { : "i"(offsetof(type, member))); \ } while (0) -noasan textreal void __setup_mman(struct mman *mm, uint64_t *pml4t) { +noasan textreal void __setup_mman(struct mman *mm, uint64_t *pml4t, + uint64_t top) { export_offsetof(struct mman, pc_drive_base_table); export_offsetof(struct mman, pc_drive_last_sector); export_offsetof(struct mman, pc_drive_last_head); + export_offsetof(struct mman, pc_drive); export_offsetof(struct mman, e820); export_offsetof(struct mman, e820_end); export_offsetof(struct mman, bad_idt); + export_offsetof(struct mman, pc_drive_next_sector); + export_offsetof(struct mman, pc_drive_next_cylinder); + export_offsetof(struct mman, pc_drive_next_head); export_offsetof(struct mman, pc_video_type); export_offsetof(struct mman, pc_video_stride); export_offsetof(struct mman, pc_video_width); @@ -170,21 +250,22 @@ noasan textreal void __setup_mman(struct mman *mm, uint64_t *pml4t) { export_offsetof(struct mman, pc_video_framebuffer_size); export_offsetof(struct mman, pc_video_curs_info); export_offsetof(struct mman, pc_video_char_height); - __normalize_e820(mm); + __normalize_e820(mm, top); __invert_memory(mm, pml4t); } /** * Maps APE-defined ELF program headers into memory and clears BSS. */ -noasan textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b) { +noasan textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b, + uint64_t top) { struct Elf64_Phdr *p; uint64_t i, f, v, m, *e; extern char ape_phdrs[] __attribute__((__weak__)); extern char ape_phdrs_end[] __attribute__((__weak__)); - __setup_mman(mm, pml4t); - for (p = (struct Elf64_Phdr *)REAL(ape_phdrs), m = 0; - p < (struct Elf64_Phdr *)REAL(ape_phdrs_end); ++p) { + __setup_mman(mm, pml4t, top); + for (p = (struct Elf64_Phdr *)INVERT(ape_phdrs), m = 0; + p < (struct Elf64_Phdr *)INVERT(ape_phdrs_end); ++p) { if (p->p_type == PT_LOAD || p->p_type == PT_GNU_STACK) { f = PAGE_RSRV | PAGE_U; if (p->p_flags & PF_W) @@ -200,8 +281,36 @@ noasan textreal void __map_phdrs(struct mman *mm, uint64_t *pml4t, uint64_t b) { v = __clear_page(BANE + __new_page(mm)); } *__get_virtual(mm, pml4t, p->p_vaddr + i, true) = (v & PAGE_TA) | f; + __ref_page(mm, pml4t, v & PAGE_TA); } } } mm->pdp = MAX(mm->pdp, m); } + +/** + * Reclaims memory pages which were used at boot time but which can now be + * made available for the application. + */ +noasan textreal void __reclaim_boot_pages(struct mman *mm, uint64_t skip_start, + uint64_t skip_end) { + uint64_t p = mm->frp, q = IMAGE_BASE_REAL, i, n = mm->e820n, b, e; + for (i = 0; i < n; ++i) { + b = mm->e820[i].addr; + if (b >= IMAGE_BASE_PHYSICAL) break; + e = MIN(IMAGE_BASE_PHYSICAL, b + mm->e820[i].size); + q = MAX(IMAGE_BASE_REAL, b); + while (q < e) { + struct ReclaimedPage *rp; + if (q == skip_start) { + q = skip_end; + if (q >= e) break; + } + rp = (struct ReclaimedPage *)(BANE + q); + rp->next = p; + p = q; + q += 4096; + } + } + mm->frp = p; +} diff --git a/libc/nt/efi.h b/libc/nt/efi.h index 6ac501950..3178a3e86 100644 --- a/libc/nt/efi.h +++ b/libc/nt/efi.h @@ -97,6 +97,12 @@ 0x8E, 0x3F, 0x00, 0xA0, 0xC9, 0x69, 0x72, 0x3B \ } \ } +#define GRAPHICS_OUTPUT_PROTOCOL \ + { \ + 0x9042A9DE, 0x23DC, 0x4A38, { \ + 0x96, 0xFB, 0x7A, 0xDE, 0xD0, 0x80, 0x51, 0x6A \ + } \ + } #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ @@ -113,6 +119,7 @@ COSMOPOLITAN_C_START_ typedef struct _EFI_SIMPLE_TEXT_INPUT_PROTOCOL EFI_SIMPLE_TEXT_INPUT_PROTOCOL; typedef struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL; +typedef struct _EFI_GRAPHICS_OUTPUT_PROTOCOL EFI_GRAPHICS_OUTPUT_PROTOCOL; typedef enum { EfiReservedMemoryType, @@ -214,6 +221,54 @@ typedef struct { bool CursorVisible; } EFI_SIMPLE_TEXT_OUTPUT_MODE; +typedef enum { + PixelRedGreenBlueReserved8BitPerColor, + PixelBlueGreenRedReserved8BitPerColor, + PixelBitMask, + PixelBltOnly, + PixelFormatMax +} EFI_GRAPHICS_PIXEL_FORMAT; + +typedef struct { + uint32_t RedMask; + uint32_t GreenMask; + uint32_t BlueMask; + uint32_t ReservedMask; +} EFI_PIXEL_BITMASK; + +typedef struct { + uint32_t Version; + uint32_t HorizontalResolution; + uint32_t VerticalResolution; + EFI_GRAPHICS_PIXEL_FORMAT PixelFormat; + EFI_PIXEL_BITMASK PixelInformation; + uint32_t PixelsPerScanLine; +} EFI_GRAPHICS_OUTPUT_MODE_INFORMATION; + +typedef struct { + uint8_t Blue; + uint8_t Green; + uint8_t Red; + uint8_t Reserved; +} EFI_GRAPHICS_OUTPUT_BLT_PIXEL; + +typedef enum { + EfiBltVideoFill, + EfiBltVideoToBltBuffer, + EfiBltBufferToVideo, + EfiBltVideoToVideo, + EfiGraphicsOutputBltOperationMax +} EFI_GRAPHICS_OUTPUT_BLT_OPERATION; + +typedef struct { + uint32_t MaxMode; + uint32_t Mode; + EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *Info; + uint32_t SizeOfInfo; + uint64_t FrameBufferBase; + uint32_t FrameBufferSize; +} EFI_GRAPHICS_OUTPUT_PROTOCOL_MODE; + typedef struct { uint64_t Signature; uint32_t Revision; @@ -300,6 +355,9 @@ typedef EFI_STATUS(EFIAPI *EFI_SET_WATCHDOG_TIMER)(uintptr_t Timeout, uint64_t WatchdogCode, uintptr_t DataSize, char16_t *opt_WatchdogData); +typedef EFI_STATUS(EFIAPI *EFI_LOCATE_PROTOCOL)(EFI_GUID *Protocol, + void *Registration, + void *Interface); typedef EFI_STATUS(EFIAPI *EFI_SET_TIME)(EFI_TIME *Time); typedef EFI_STATUS(EFIAPI *EFI_GET_TIME)( @@ -343,6 +401,19 @@ typedef EFI_STATUS(EFIAPI *EFI_TEXT_SET_CURSOR_POSITION)( typedef EFI_STATUS(EFIAPI *EFI_TEXT_ENABLE_CURSOR)( EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL *This, bool Visible); +typedef EFI_STATUS(EFIAPI *EFI_GRAPHICS_OUTPUT_PROTOCOL_QUERY_MODE)( + EFI_GRAPHICS_OUTPUT_PROTOCOL *This, uint32_t ModeNumber, + uint32_t *SizeOfInfo, EFI_GRAPHICS_OUTPUT_MODE_INFORMATION **Info); +typedef EFI_STATUS(EFIAPI *EFI_GRAPHICS_OUTPUT_PROTOCOL_SET_MODE)( + EFI_GRAPHICS_OUTPUT_PROTOCOL *This, uint32_t ModeNumber); +typedef EFI_STATUS(EFIAPI *EFI_GRAPHICS_OUTPUT_PROTOCOL_BLT)( + EFI_GRAPHICS_OUTPUT_PROTOCOL *This, + EFI_GRAPHICS_OUTPUT_BLT_PIXEL *BltBuffer, + EFI_GRAPHICS_OUTPUT_BLT_OPERATION BltOperation, + uint32_t SourceX, uint32_t SourceY, + uint32_t DestinationX, uint32_t DestinationY, + uint32_t Width, uint32_t Height, uint32_t Delta); + typedef EFI_STATUS(EFIAPI *EFI_HANDLE_PROTOCOL)(EFI_HANDLE Handle, EFI_GUID *Protocol, void *out_Interface); @@ -414,7 +485,7 @@ typedef struct { void *OpenProtocolInformation; void *ProtocolsPerHandle; void *LocateHandleBuffer; - void *LocateProtocol; + EFI_LOCATE_PROTOCOL LocateProtocol; void *InstallMultipleProtocolInterfaces; void *UninstallMultipleProtocolInterfaces; void *CalculateCrc32; @@ -458,6 +529,13 @@ struct _EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL { EFI_SIMPLE_TEXT_OUTPUT_MODE *Mode; }; +struct _EFI_GRAPHICS_OUTPUT_PROTOCOL { + EFI_GRAPHICS_OUTPUT_PROTOCOL_QUERY_MODE QueryMode; + EFI_GRAPHICS_OUTPUT_PROTOCOL_SET_MODE SetMode; + EFI_GRAPHICS_OUTPUT_PROTOCOL_BLT Blt; + EFI_GRAPHICS_OUTPUT_PROTOCOL_MODE *Mode; +}; + typedef struct { uint32_t Revision; EFI_HANDLE ParentHandle; diff --git a/libc/runtime/efimain.greg.c b/libc/runtime/efimain.greg.c index b64b21d0b..dcbbe0dab 100644 --- a/libc/runtime/efimain.greg.c +++ b/libc/runtime/efimain.greg.c @@ -16,9 +16,12 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "ape/relocations.h" #include "ape/sections.internal.h" #include "libc/dce.h" #include "libc/intrin/bits.h" +#include "libc/intrin/newbie.h" +#include "libc/intrin/weaken.h" #include "libc/macros.internal.h" #include "libc/nt/efi.h" #include "libc/nt/thunk/msabi.h" @@ -37,6 +40,80 @@ struct EfiArgs { }; static const EFI_GUID kEfiLoadedImageProtocol = LOADED_IMAGE_PROTOCOL; +static const EFI_GUID kEfiGraphicsOutputProtocol = GRAPHICS_OUTPUT_PROTOCOL; + +extern const char vga_console[]; +extern void _EfiPostboot(struct mman *, uint64_t *, uintptr_t, char **); + +static void EfiInitVga(struct mman *mm, EFI_SYSTEM_TABLE *SystemTable) { + EFI_GRAPHICS_OUTPUT_PROTOCOL *GraphInfo; + EFI_GRAPHICS_OUTPUT_PROTOCOL_MODE *GraphMode; + EFI_PIXEL_BITMASK *PixelInfo; + unsigned vid_typ = PC_VIDEO_TEXT; + size_t bytes_per_pix = 0; + + SystemTable->BootServices->LocateProtocol(&kEfiGraphicsOutputProtocol, NULL, + &GraphInfo); + GraphMode = GraphInfo->Mode; + switch (GraphMode->Info->PixelFormat) { + case PixelRedGreenBlueReserved8BitPerColor: + vid_typ = PC_VIDEO_RGBX8888; + bytes_per_pix = 4; + break; + case PixelBlueGreenRedReserved8BitPerColor: + vid_typ = PC_VIDEO_BGRX8888; + bytes_per_pix = 4; + break; + case PixelBitMask: + PixelInfo = &GraphMode->Info->PixelInformation; + switch (le32toh(PixelInfo->RedMask)) { + case 0x00FF0000U: + if (le32toh(PixelInfo->ReservedMask) >= 0x01000000U && + le32toh(PixelInfo->GreenMask) == 0x0000FF00U && + le32toh(PixelInfo->BlueMask) == 0x000000FFU) { + vid_typ = PC_VIDEO_BGRX8888; + bytes_per_pix = 4; + } + break; + case 0x000000FFU: + if (le32toh(PixelInfo->ReservedMask) >= 0x01000000U && + le32toh(PixelInfo->GreenMask) == 0x0000FF00U && + le32toh(PixelInfo->BlueMask) == 0x00FF0000U) { + vid_typ = PC_VIDEO_RGBX8888; + bytes_per_pix = 4; + } + break; + case 0x0000F800U: + if (le32toh(PixelInfo->ReservedMask) <= 0x0000FFFFU && + le32toh(PixelInfo->GreenMask) == 0x000007E0U && + le32toh(PixelInfo->BlueMask) == 0x0000001FU) { + vid_typ = PC_VIDEO_BGR565; + bytes_per_pix = 2; + } + break; + case 0x00007C00U: + if (le32toh(PixelInfo->ReservedMask) <= 0x0000FFFFU && + le32toh(PixelInfo->GreenMask) == 0x000003E0U && + le32toh(PixelInfo->BlueMask) == 0x0000001FU) { + vid_typ = PC_VIDEO_BGR555; + bytes_per_pix = 2; + } + break; + } + default: + notpossible; + } + if (!bytes_per_pix) notpossible; + mm->pc_video_type = vid_typ; + mm->pc_video_stride = GraphMode->Info->PixelsPerScanLine * bytes_per_pix; + mm->pc_video_width = GraphMode->Info->HorizontalResolution; + mm->pc_video_height = GraphMode->Info->VerticalResolution; + mm->pc_video_framebuffer = GraphMode->FrameBufferBase; + mm->pc_video_framebuffer_size = GraphMode->FrameBufferSize; + mm->pc_video_curs_info.y = mm->pc_video_curs_info.x = 0; + SystemTable->BootServices->SetMem((void *)GraphMode->FrameBufferBase, + GraphMode->FrameBufferSize, 0); +} /** * EFI Application Entrypoint. @@ -64,27 +141,44 @@ static const EFI_GUID kEfiLoadedImageProtocol = LOADED_IMAGE_PROTOCOL; */ __msabi noasan EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) { - int type, x87cw; + int type, x87cw = 0x037f; struct mman *mm; uint32_t DescVersion; uintptr_t i, j, MapSize; struct EfiArgs *ArgBlock; EFI_LOADED_IMAGE *ImgInfo; - EFI_MEMORY_DESCRIPTOR *Map; + EFI_MEMORY_DESCRIPTOR *Map, *Desc; + uint64_t Address; uintptr_t Args, MapKey, DescSize; uint64_t p, pe, cr4, *m, *pd, *sp, *pml4t, *pdt1, *pdt2, *pdpt1, *pdpt2; - extern char __os asm("__hostos"); - - __os = _HOSTMETAL; /* - * Allocates and clears PC-compatible memory and copies image. + * Allocates and clears PC-compatible memory and copies image. Marks the + * pages as EfiRuntimeServicesData, so that we can simply free up all + * EfiLoader... and EfiBootServices... pages later. The first page at + * address 0 is normally already allocated as EfiBootServicesData, so + * handle it separately. */ + Address = 0; SystemTable->BootServices->AllocatePages( - AllocateAddress, EfiConventionalMemory, - MAX(2 * 1024 * 1024, 1024 * 1024 + (_end - _base)) / 4096, 0); - SystemTable->BootServices->SetMem(0, 0x80000, 0); - SystemTable->BootServices->CopyMem((void *)(1024 * 1024), _base, + AllocateAddress, EfiRuntimeServicesData, 4096 / 4096, &Address); + Address = 4096; + SystemTable->BootServices->AllocatePages( + AllocateAddress, EfiRuntimeServicesData, (IMAGE_BASE_REAL - 4096) / 4096, + &Address); + Address = 0x79000; + SystemTable->BootServices->AllocatePages( + AllocateAddress, EfiRuntimeServicesData, + (0x7e000 - 0x79000 + sizeof(struct EfiArgs) + 4095) / 4096, &Address); + Address = IMAGE_BASE_PHYSICAL; + SystemTable->BootServices->AllocatePages( + AllocateAddress, EfiRuntimeServicesData, + ((_end - _base) + 4095) / 4096, &Address); + mm = (struct mman *)0x0500; + SystemTable->BootServices->SetMem(mm, sizeof(*mm), 0); + SystemTable->BootServices->SetMem((void *)0x79000, + 0x7e000 - 0x79000 + sizeof(struct EfiArgs), 0); + SystemTable->BootServices->CopyMem((void *)IMAGE_BASE_PHYSICAL, _base, _end - _base); /* @@ -97,6 +191,13 @@ __msabi noasan EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, sizeof(ArgBlock->ArgBlock), ArgBlock->Args, ARRAYLEN(ArgBlock->Args)); + /* + * Gets information about our current video mode. Clears the screen. + * TODO: if needed, switch to a video mode that has a linear frame buffer + * type we support. + */ + if (_weaken(vga_console)) EfiInitVga(mm, SystemTable); + /* * Asks UEFI which parts of our RAM we're allowed to use. */ @@ -104,16 +205,26 @@ __msabi noasan EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, MapSize = 0; SystemTable->BootServices->GetMemoryMap(&MapSize, Map, &MapKey, &DescSize, &DescVersion); - SystemTable->BootServices->AllocatePool(EfiLoaderData, MapSize, Map); + SystemTable->BootServices->AllocatePool(EfiLoaderData, MapSize, &Map); + MapSize *= 2; SystemTable->BootServices->GetMemoryMap(&MapSize, Map, &MapKey, &DescSize, &DescVersion); - asm("xor\t%0,%0" : "=r"(mm)); /* gcc assumes null isn't mapped */ - for (j = i = 0; i < MapSize / sizeof(EFI_MEMORY_DESCRIPTOR); ++i) { - if (Map[i].Type != EfiConventionalMemory) continue; - mm->e820[j].addr = Map[i].PhysicalStart; - mm->e820[j].size = Map[i].NumberOfPages * 4096; - mm->e820[j].type = kMemoryUsable; - ++j; + for (j = i = 0, Desc = Map; i < MapSize / DescSize; ++i) { + switch (Desc->Type) { + case EfiLoaderCode: + case EfiLoaderData: + case EfiBootServicesCode: + case EfiBootServicesData: + if (Desc->PhysicalStart != 0) + break; + /* fallthrough */ + case EfiConventionalMemory: + mm->e820[j].addr = Desc->PhysicalStart; + mm->e820[j].size = Desc->NumberOfPages * 4096; + mm->e820[j].type = kMemoryUsable; + ++j; + } + Desc = (EFI_MEMORY_DESCRIPTOR *)((char *)Desc + DescSize); } SystemTable->BootServices->FreePool(Map); @@ -135,7 +246,6 @@ __msabi noasan EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, pdpt2[0] = (intptr_t)pdt2 + PAGE_V + PAGE_RW; pml4t[0] = (intptr_t)pdpt1 + PAGE_V + PAGE_RW; pml4t[256] = (intptr_t)pdpt2 + PAGE_V + PAGE_RW; - __map_phdrs(mm, pml4t, 1024 * 1024); /* * Asks UEFI to handover control? @@ -143,45 +253,8 @@ __msabi noasan EFI_STATUS EfiMain(EFI_HANDLE ImageHandle, SystemTable->BootServices->ExitBootServices(ImageHandle, MapKey); /* - * Launches program. + * Switches to copied image and launches program. */ - asm volatile("fldcw\t%3\n\t" - "mov\t%4,%%cr3\n\t" - ".weak\tape_stack_vaddr\n\t" - ".weak\tape_stack_memsz\n\t" - "movabs\t$ape_stack_vaddr,%%rsp\n\t" - "add\t$ape_stack_memsz,%%rsp\n\t" - "push\t$0\n\t" /* auxv[1][1] */ - "push\t$0\n\t" /* auxv[1][0] */ - "push\t(%1)\n\t" /* auxv[0][1] */ - "push\t$31\n\t" /* auxv[0][0] AT_EXECFN */ - "push\t$0\n\t" /* envp[0] */ - "sub\t%2,%%rsp\n\t" - "mov\t%%rsp,%%rdi\n\t" - "rep movsb\n\t" /* argv */ - "push\t%0\n\t" /* argc */ - "xor\t%%edi,%%edi\n\t" - "xor\t%%eax,%%eax\n\t" - "xor\t%%ebx,%%ebx\n\t" - "xor\t%%ecx,%%ecx\n\t" - "xor\t%%edx,%%edx\n\t" - "xor\t%%edi,%%edi\n\t" - "xor\t%%esi,%%esi\n\t" - "xor\t%%ebp,%%ebp\n\t" - "xor\t%%r8d,%%r8d\n\t" - "xor\t%%r9d,%%r9d\n\t" - "xor\t%%r10d,%%r10d\n\t" - "xor\t%%r11d,%%r11d\n\t" - "xor\t%%r12d,%%r12d\n\t" - "xor\t%%r13d,%%r13d\n\t" - "xor\t%%r14d,%%r14d\n\t" - "xor\t%%r15d,%%r15d\n\t" - ".weak\t_start\n\t" - "jmp\t_start" - : /* no outputs */ - : "a"(Args), "S"(ArgBlock->Args), "c"((Args + 1) * 8), - "m"(x87cw), "r"(pml4t) - : "memory"); - + _EfiPostboot(mm, pml4t, Args, ArgBlock->Args); unreachable; } diff --git a/libc/runtime/efipostboot.S b/libc/runtime/efipostboot.S new file mode 100644 index 000000000..1ef27085d --- /dev/null +++ b/libc/runtime/efipostboot.S @@ -0,0 +1,126 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "ape/relocations.h" +#include "libc/dce.h" +#include "libc/macros.internal.h" +#include "libc/runtime/pc.internal.h" +.real + +// Start the Cosmopolitan runtime after exiting UEFI Boot Services. +// +// @param rdi is mm +// @param rsi is new pml4t +// @param rdx is argc +// @param rcx is argv +// @see libc/runtime/efimain.greg.c +_EfiPostboot: + cli +// Define handy mnemonics for parameters & constants stored in +// call-saved registers. +#define rMm %r12 +#define rArgc %r13 +#define rArgv %r14 +#define rBane %r15 + movabs $BANE,rBane + mov %rdi,rMm + mov %rdx,rArgc + lea (rBane,%rcx),rArgv + mov $PHYSICAL(.Ltmpstk),%rax # switch to temporary stack + and $-16,%al # in physical space + xchg %rax,%rsp + mov $PHYSICAL(0f),%eax # resume execution in copied + jmp *%rax # image +0: mov $EFER,%ecx # enable syscall/sysret & nx + rdmsr + or $EFER_SCE|EFER_NXE,%eax + wrmsr + mov %rsi,%cr3 # load new page table + add rBane,%rsp # we can now switch stack to + add rBane,rMm # negative address space + mov $1024*1024,%edx # set up virtual memory + mov $1024*1024+_end,%ecx # mapping + sub $_base,%ecx + call __map_phdrs + mov $1f,%eax # switch rip to virtual + jmp *%rax # address space +1: push $0x037f + fldcw (%rsp) + .weak _gdtr + lgdt _gdtr # switch to our own GDT + mov $GDT_LONG_DATA,%ax + mov %ax,%ds + mov %ax,%ss + mov %ax,%es + mov %ax,%fs + mov %ax,%gs + .weak ape_stack_vaddr + .weak ape_stack_memsz + movabs $ape_stack_vaddr,%rsp # switch to final stack in + add $ape_stack_memsz,%rsp # virtual address space + movl $0,0x7b000 # unmap null 2mb + mov rMm,%rdi + xor %esi,%esi # free up now-unused pages + xor %edx,%edx + call __reclaim_boot_pages + push .Lenv0(%rip) # envp[0][0] + mov %rsp,%rbp + push $0 # auxv[1][1] + push $0 # auxv[1][0] + mov (rArgv),%rax + add rBane,%rax + push %rax # auxv[0][1] + push $31 # auxv[0][0] AT_EXECFN + push $0 # envp[1] + push %rbp # envp[0] + push $0 # argv[argc] NULL + lea -8(rArgv,rArgc,8),%rsi # push rest of argv, & + mov rArgc,%rcx # adjust pointers to point to + std # negative space +2: lodsq + add rBane,%rax + push %rax + loop 2b + cld + push rArgc # argc + pushpop _HOSTMETAL,%rcx # sets __hostos in crt.S + xor %ebp,%ebp + xor %eax,%eax + xor %ebx,%ebx + xor %edx,%edx + xor %edi,%edi + xor %esi,%esi + xor %r8d,%r8d + xor %r9d,%r9d + xor %r10d,%r10d + xor %r11d,%r11d + xor %r12d,%r12d + xor %r13d,%r13d + xor %r14d,%r14d + xor %r15d,%r15d + push $GDT_LONG_CODE + .weak _start + push $_start + lretq + .endfn _EfiPostboot,globl,hidden + .rodata +.Lenv0: .asciz "METAL=1" + .bss + .space 0x1000 +.Ltmpstk: + .previous diff --git a/libc/runtime/mman.internal.h b/libc/runtime/mman.internal.h index 64c0fcb95..55ea6586d 100644 --- a/libc/runtime/mman.internal.h +++ b/libc/runtime/mman.internal.h @@ -8,31 +8,38 @@ struct mman { int64_t pdp; /* 0x0500 */ int32_t pdpi; /* 0x0508 */ int32_t e820n; /* 0x050c */ - struct SmapEntry e820[256]; /* 0x0510 */ - struct SmapEntry e820_end[0]; /* 0x1d10 */ - char pc_drive_base_table[11]; /* 0x1d10 */ - unsigned char pc_drive_type; /* 0x1d1b */ - unsigned char pc_drive_last_sector; /* 0x1d1c */ - unsigned short pc_drive_last_cylinder; /* 0x1d1e */ - unsigned char pc_drives_attached; /* 0x1d20 */ - unsigned char pc_drive_last_head; /* 0x1d21 */ - unsigned char pc_drive; /* 0x1d22 */ - char bad_idt[6]; /* 0x1d23 */ - unsigned char pc_video_type; /* 0x1d29 */ - unsigned short pc_video_stride; /* 0x1d2a — line width, including any + uint64_t frp; /* 0x0510 — free list of reclaimed, + previously used pages */ + struct SmapEntry e820[256]; /* 0x0518 */ + struct SmapEntry e820_end[0]; /* 0x1d18 */ + char pc_drive_base_table[11]; /* 0x1d18 */ + unsigned char pc_drive_type; /* 0x1d23 */ + unsigned char pc_drive_last_sector; /* 0x1d24 */ + unsigned short pc_drive_last_cylinder; /* 0x1d26 */ + unsigned char pc_drives_attached; /* 0x1d28 */ + unsigned char pc_drive_last_head; /* 0x1d29 */ + unsigned char pc_drive; /* 0x1d2a */ + char bad_idt[2]; /* 0x1d2b — IDTR value for an invalid + IDT; really 6 bytes which + overlap w/ fields below */ + unsigned char pc_drive_next_sector; /* 0x1d2d */ + unsigned short pc_drive_next_cylinder; /* 0x1d2e */ + unsigned char pc_drive_next_head; /* 0x1d30 */ + unsigned char pc_video_type; /* 0x1d31 */ + unsigned short pc_video_stride; /* 0x1d32 — line width, including any invisible "pixels" — in bytes (NOTE) */ - unsigned short pc_video_width; /* 0x1d2c — width in chars. (text) + unsigned short pc_video_width; /* 0x1d34 — width in chars. (text) or pixels (graphics) */ - unsigned short pc_video_height; /* 0x1d2e — height in chars. (text) + unsigned short pc_video_height; /* 0x1d36 — height in chars. (text) or pixels (graphics) */ - uint64_t pc_video_framebuffer; /* 0x1d30 — physical address of + uint64_t pc_video_framebuffer; /* 0x1d38 — physical address of video frame buffer */ - uint64_t pc_video_framebuffer_size; /* 0x1d38 */ - struct { /* 0x1d40 — starting cursor pos. */ + uint64_t pc_video_framebuffer_size; /* 0x1d40 */ + struct { /* 0x1d48 — starting cursor pos. */ unsigned short y, x; } pc_video_curs_info; - unsigned short pc_video_char_height; /* 0x1d44 — character height (useful + unsigned short pc_video_char_height; /* 0x1d4c — character height (useful for setting cursor shape in text mode) */ }; diff --git a/libc/runtime/pc.internal.h b/libc/runtime/pc.internal.h index 24c552c3a..cb76e1b23 100644 --- a/libc/runtime/pc.internal.h +++ b/libc/runtime/pc.internal.h @@ -167,6 +167,9 @@ #define PAGE_GROD /* blinkenlights MAP_GROWSDOWN */ 0b010000000000 #define PAGE_TA 0x00007ffffffff000 #define PAGE_PA2 0x00007fffffe00000 +#define PAGE_IGN2 0x07f0000000000000 +#define PAGE_REFC PAGE_IGN2 /* libc reference counting */ +#define PAGE_1REF 0x0010000000000000 /* libc reference counting */ #define PAGE_XD 0x8000000000000000 #if !(__ASSEMBLER__ + __LINKER__ + 0) @@ -186,9 +189,13 @@ struct IdtDescriptor { uint64_t *__get_virtual(struct mman *, uint64_t *, int64_t, bool); uint64_t __clear_page(uint64_t); uint64_t __new_page(struct mman *); -void __invert_memory_area(struct mman *, uint64_t *, uint64_t, uint64_t, - uint64_t); -void __map_phdrs(struct mman *, uint64_t *, uint64_t); +uint64_t * __invert_memory_area(struct mman *, uint64_t *, uint64_t, uint64_t, + uint64_t); +void __map_phdrs(struct mman *, uint64_t *, uint64_t, uint64_t); +void __reclaim_boot_pages(struct mman *, uint64_t, uint64_t); +void __ref_page(struct mman *, uint64_t *, uint64_t); +void __ref_pages(struct mman *, uint64_t *, uint64_t, uint64_t); +void __unref_page(struct mman *, uint64_t *, uint64_t); forceinline unsigned char inb(unsigned short port) { unsigned char al; diff --git a/libc/stubs/ld.S b/libc/stubs/ld.S index b729f18d2..8f314a192 100644 --- a/libc/stubs/ld.S +++ b/libc/stubs/ld.S @@ -29,6 +29,7 @@ _base = 0 ape_xlm = 0 _ehead = 0 + _ezip = 0 _ereal = 0 __privileged_start = 0 __privileged_end = 0 @@ -68,6 +69,7 @@ .globl _edata .globl _ehead .globl _end + .globl _ezip .globl _ereal .globl _etext .globl _tdata_start @@ -97,6 +99,7 @@ .weak _edata .weak _ehead .weak _end + .weak _ezip .weak _ereal .weak _etext .weak _tdata_start diff --git a/libc/vga/vga-init.greg.c b/libc/vga/vga-init.greg.c index 32b6fed45..a64bf053c 100644 --- a/libc/vga/vga-init.greg.c +++ b/libc/vga/vga-init.greg.c @@ -53,6 +53,7 @@ void _vga_reinit(struct Tty *tty, unsigned short starty, unsigned short startx, /* Make sure the video buffer is mapped into virtual memory. */ __invert_memory_area(mm, __get_pml4t(), vid_buf_phy, vid_buf_sz, PAGE_RW | PAGE_XD); + __ref_pages(mm, __get_pml4t(), vid_buf_phy, vid_buf_sz); /* * Initialize our tty structure from the current screen geometry, screen * contents, cursor position, & character dimensions.