From 771566c8f897e69c778d4e1207f9cb028f47b1db Mon Sep 17 00:00:00 2001 From: tkchia Date: Tue, 8 Nov 2022 15:01:26 +0000 Subject: [PATCH] [metal] Load program pages beyond 1st 440 KiB to extended memory o//examples/hellolua.com now runs correctly under QEMU (in legacy BIOS mode). --- ape/ape.S | 106 +++++++++++++++++++++++++++++------ ape/ape.lds | 7 ++- libc/intrin/mman.greg.c | 4 ++ libc/runtime/mman.internal.h | 7 ++- 4 files changed, 104 insertions(+), 20 deletions(-) diff --git a/ape/ape.S b/ape/ape.S index dad97b2e7..055837d01 100644 --- a/ape/ape.S +++ b/ape/ape.S @@ -261,6 +261,9 @@ pc: cld xor %dh,%dh # current head mov $v_ape_realsectors,%di # total sectors 3: call pcread + mov %es,%si # addr += 512 + add $512>>4,%si + mov %si,%es dec %di jnz 3b 6: ljmp $0,$REAL(realmodeloader) @@ -349,6 +352,7 @@ dsknfo: push %bx pop %ds xchg %bx,%ax stosw #→ pc_drive_type, pc_drive_last_sector + scasb xchg %cx,%ax stosw #→ pc_drive_last_cylinder xchg %dx,%ax @@ -387,9 +391,6 @@ pcread: push %ax pop %cx pop %ax jc 9f - mov %es,%si # addr += 512 - add $512>>4,%si - mov %si,%es inc %al # ++sector cmp mm+"struct mman::pc_drive_last_sector",%al jbe 2f @@ -1196,6 +1197,7 @@ _gdt: the default mode of operation on modern cpus */ realmodeloader: + call lhinit call rlinit call sinit4 .optfn _start16 @@ -1203,6 +1205,25 @@ realmodeloader: call longmodeloader .endfn realmodeloader +// Prepares to later load parts of the program that are not loaded yet. +// +// @param al next sector number +// @param cx next cylinder number +// @param dh next head number +// @param dl drive number +// @clob ax, cx, es, di +lhinit: cmp $0x40,%dl + je 9f + pushpop 0,%es + mov $mm+"struct mman::pc_drive_next_sector",%di + cld + stosb #→ pc_drive_next_sector + xchg %ax,%cx + stosw #→ pc_drive_next_cylinder + mov %dh,%al + stosb #→ pc_drive_next_head +9: ret + .section .sort.text.real.init.1,"ax",@progbits .type rlinit,@function rlinit: .previous/* @@ -1264,6 +1285,7 @@ longmodeloader: call a20 call e820 call cpyhi + call loadhi call pinit call golong .endfn longmodeloader @@ -1407,7 +1429,27 @@ a20: cli .endfn a20 // Copies program pages loaded into base memory, to extended memory. -cpyhi: push %ds +// +// @clob esi, edi +cpyhi: push %es + movpp %ds,%es + call unreal + mov $IMAGE_BASE_REAL,%esi + mov $IMAGE_BASE_PHYSICAL,%edi + mov $v_ape_realdwords,%ecx + cld + rep movsl %ds:(%esi),%es:(%edi) + sti + pop %es + ret + .endfn cpyhi + +// Disables interrupts and switches to "unreal mode". +// +// @return ds, es have same base addresses as before but can access +// 4 GiB of memory +// @clob eax +unreal: push %ds push %es cli lgdt REAL(_gdtrlo) @@ -1416,26 +1458,58 @@ cpyhi: push %ds mov %eax,%cr0 jmp 0f 0: pushpop GDT_LEGACY_DATA,%ds - push %ds - pop %es - mov $IMAGE_BASE_REAL,%esi - mov $IMAGE_BASE_PHYSICAL,%edi - mov $v_ape_realdwords,%ecx - cld - rep movsl %ds:(%esi),%es:(%edi) + movpp %ds,%es and $~CR0_PE,%al mov %eax,%cr0 jmp 1f -1: sti - pop %es +1: pop %es pop %ds ret - .endfn cpyhi + +// Reads any remaining program pages into memory which have not yet +// been read by the boot sector. +// +// @clob eax, ecx, dx, esi, edi, bp +loadhi: mov $v_ape_highsectors,%bp + test %bp,%bp + jz 9f + mov $mm+"struct mman::pc_drive",%si + cld + lodsb #← pc_drive + xchg %ax,%dx + lodsw + lodsb #← pc_drive_next_sector + test %al,%al + jz 9f + xchg %ax,%cx + lodsw #← pc_drive_next_cylinder + xchg %ax,%cx + mov (%si),%dh #← pc_drive_next_head + push %es +#define SEG 0x79000 + mov $IMAGE_BASE_PHYSICAL+v_ape_realbytes-SEG,%edi + push $SEG>>4 + pop %es +0: call pcread + push %ax + call unreal + pop %ax + push %cx + xor %esi,%esi + xor %ecx,%ecx + mov $512/4,%cl + cld + rep movsl %es:(%esi),%es:(%edi) + sti + pop %cx + dec %bp + jnz 0b + pop %es +9: ret // Initializes long mode paging. pinit: push %ds push %es -#define SEG 0x79000 mov $SEG>>4,%ax mov %ax,%ds mov %ax,%es @@ -1507,7 +1581,7 @@ long: movabs $BANE+PHYSICAL(0f),%rax mov $mm,%rdi mov %cr3,%rsi mov $IMAGE_BASE_PHYSICAL,%edx - lea v_ape_realbytes(%rdx),%ecx + lea v_ape_allbytes(%rdx),%ecx call __map_phdrs push $0x037f fldcw (%rsp) diff --git a/ape/ape.lds b/ape/ape.lds index 6c6109805..9b7f810dc 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -568,8 +568,9 @@ HIDDEN(v_ape_realsectors = ROUNDUP(RVA(_tdata_end), 512)) / 512); HIDDEN(v_ape_realbytes = v_ape_realsectors * 512); HIDDEN(v_ape_realdwords = v_ape_realsectors * (512 / 4)); -HIDDEN(v_ape_highsectors = - (ROUNDUP(RVA(_tdata_end), 512) / 512) - v_ape_realsectors); +HIDDEN(v_ape_allsectors = ROUNDUP(RVA(_tdata_end), 512) / 512); +HIDDEN(v_ape_allbytes = v_ape_allsectors * 512); +HIDDEN(v_ape_highsectors = v_ape_allsectors - v_ape_realsectors); TSSDESCSTUB2(_tss, _tss, _tss_end ? _tss_end - _tss - 1 : 0); #endif @@ -652,7 +653,7 @@ CHURN(ADDR(.bss)); CHURN(_start); CHURN(ape_phdrs); #if SupportsMetal() -CHURN(v_ape_realsectors); +CHURN(v_ape_allsectors); #endif #if SupportsXnu() CHURN(ape_macho); diff --git a/libc/intrin/mman.greg.c b/libc/intrin/mman.greg.c index 6191d5f6e..024bd508c 100644 --- a/libc/intrin/mman.greg.c +++ b/libc/intrin/mman.greg.c @@ -177,9 +177,13 @@ noasan textreal void __setup_mman(struct mman *mm, uint64_t *pml4t, export_offsetof(struct mman, pc_drive_base_table); export_offsetof(struct mman, pc_drive_last_sector); export_offsetof(struct mman, pc_drive_last_head); + export_offsetof(struct mman, pc_drive); export_offsetof(struct mman, e820); export_offsetof(struct mman, e820_end); export_offsetof(struct mman, bad_idt); + export_offsetof(struct mman, pc_drive_next_sector); + export_offsetof(struct mman, pc_drive_next_cylinder); + export_offsetof(struct mman, pc_drive_next_head); export_offsetof(struct mman, pc_video_type); export_offsetof(struct mman, pc_video_stride); export_offsetof(struct mman, pc_video_width); diff --git a/libc/runtime/mman.internal.h b/libc/runtime/mman.internal.h index 8d3ba21b9..55ea6586d 100644 --- a/libc/runtime/mman.internal.h +++ b/libc/runtime/mman.internal.h @@ -19,7 +19,12 @@ struct mman { unsigned char pc_drives_attached; /* 0x1d28 */ unsigned char pc_drive_last_head; /* 0x1d29 */ unsigned char pc_drive; /* 0x1d2a */ - char bad_idt[6]; /* 0x1d2b */ + char bad_idt[2]; /* 0x1d2b — IDTR value for an invalid + IDT; really 6 bytes which + overlap w/ fields below */ + unsigned char pc_drive_next_sector; /* 0x1d2d */ + unsigned short pc_drive_next_cylinder; /* 0x1d2e */ + unsigned char pc_drive_next_head; /* 0x1d30 */ unsigned char pc_video_type; /* 0x1d31 */ unsigned short pc_video_stride; /* 0x1d32 — line width, including any invisible "pixels" — in