動機

這篇主要在看怎麼從最一開始到一個可以打字的kernel的過程 重點不是裡面的code,而是過程

因為真正讓人痛苦的東西其實是在os提供的各種抽象設施,也是progammer要在意的點

開始之前

可以直接看下面的說明,如果想跑跑看可以照下面的步驟

  1. 裝docker(不想重編toolchain), nasm, build-essential, gdb, qemu-x86
  2. clone 這個
  3. checkout到想看的commit去跑

從硬碟出發: bootloader

bootloader

boot sector是bios第一個讀的sector,sector大小是512 bytes bootloader會放在這裡

bootloaer的程式會被bios放在mem的0x7c00

bios會去找aa55,以這個為終點

所以bootloader的咒語是

[org 0x7c00] ; 在做c的星號時直接加0x7c00
; command ...
; $ :: 現在位置
; $$ :: 前一個指令的位置
times 510-($-$$) db 0 
dw 0xaa55

aa55佔2 bytes,之後就是用0填滿 $是現在的位置,$$是前一個指令的位置

ia32 programming

assembler: 讀code,算位置,噴binary(或是elf)

byte, word, double word(dword)

byte: 8bits word: 2bytes dword: 4bytes

之後要用ia32去生

byte: db word: dw dword: dd

nasm的output長什麼樣子
db 0x0
db 0xb5
dw 0x0
dw 0xc5
dd 0x0
dw 0xd5

output

00000000: 00b5 0000 c500 0000 0000 d500            ............
db 0xb5
db 0x0
dw 0xc5
db 0x0
dd 0xd5
db 0x0

output

00000000: b500 c500 00d5 0000 0000                 ..........

byte: 兩格 word: 四格 dword: 八格

一格大小是0.5byte (hex, 4bits)

mode

16bits real mode
  • 定址範圍是1MB
  • 可以用bios的handler去操作裝置
  • 只要知道mem的addr就可以改
32bits protected mode
  • 定址範圍是4GB
  • 沒有bios的handler可以用…
    • 所以要自己寫driver
  • 有GDT可以控制權限

memory address

16bits real mode: segmentation

Physical Address = Segment * 16 + Offset 就是左移1格

ds是data segmentation的base reg

mov ah, 0x0e ; tty

mov bx, 0x7c0 ; 移1格
mov ds, bx
; WARNING: from now on all memory references will be offset by 'ds' implicitly
mov al, [the_secret]
int 0x10

mov al, [es:the_secret]
int 0x10 ; doesn't look right... isn't 'es' currently 0x000?

mov bx, 0x7c0
mov es, bx
mov al, [es:the_secret]
int 0x10


jmp $

the_secret:
    db "X"

times 510 - ($-$$) db 0
dw 0xaa55
32bits protected mode: GDT

GDT就是區分mem的表,除了標長度、起點還可以設定權限(ring1~3就是這裡來的)

GDT最少要有兩個segment,包含start!!

但設定的樣子十分反人類

gdt_start: ; don't remove the labels, they're needed to compute sizes and jumps
    ; the GDT starts with a null 8-byte
    dd 0x0 ; 4 byte
    dd 0x0 ; 4 byte

; GDT for code segment. base = 0x00000000, length = 0xfffff
; for flags, refer to os-dev.pdf document, page 36
gdt_code: 
    dw 0xffff    ; segment length, bits 0-15
    dw 0x0       ; segment base, bits 0-15
    db 0x0       ; segment base, bits 16-23
    db 10011010b ; flags (8 bits)
    db 11001111b ; flags (4 bits) + segment length, bits 16-19
    db 0x0       ; segment base, bits 24-31

; GDT for data segment. base and length identical to code segment
; some flags changed, again, refer to os-dev.pdf
gdt_data:
    dw 0xffff
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0

gdt_end:

; GDT descriptor
gdt_descriptor:
    dw gdt_end - gdt_start - 1 ; size (16 bit), always one less of its true size
    dd gdt_start ; address (32 bit)

; define some constants for later use
CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

詳細的flag可以看這裡

之後就可以load他

lgdt [gdt_descriptor]

bootloader的工作

載入kernel

Q: 為什麼不在32bits保護模式載入kernel? A: 現在還可以讓bios幫我們load,只要打中斷就好

INT 13,2 - Read Disk Sectors


KERNEL_OFFSET equ 0x1000

disk_load:
    pusha
    ; reading from disk requires setting specific values in all registers
    ; so we will overwrite our input parameters from 'dx'. Let's save it
    ; to the stack for later use.
    push dx

    mov ah, 0x02 ; ah <- int 0x13 function. 0x02 = 'read'
    mov al, dh   ; al <- number of sectors to read (0x01 .. 0x80)
    mov cl, 0x02 ; cl <- sector (0x01 .. 0x11)
                 ; 0x01 is our boot sector, 0x02 is the first 'available' sector
    mov ch, 0x00 ; ch <- cylinder (0x0 .. 0x3FF, upper 2 bits in 'cl')
    ; dl <- drive number. Our caller sets it as a parameter and gets it from BIOS
    ; (0 = floppy, 1 = floppy2, 0x80 = hdd, 0x81 = hdd2)
    mov dh, 0x00 ; dh <- head number (0x0 .. 0xF)

    ; [es:bx] <- pointer to buffer where the data will be stored
    ; caller sets it up for us, and it is actually the standard location for int 13h
    int 0x13      ; BIOS interrupt
    jc disk_error ; if error (stored in the carry bit)

    pop dx
    cmp al, dh    ; BIOS also sets 'al' to the # of sectors read. Compare it.
    jne sectors_error
    popa
    ret

disk_error:
    mov bx, DISK_ERROR
    call print_16
    call print_nl_16
    jmp disk_loop

sectors_error:
    mov bx, SECTORS_ERROR
    call print_16

disk_loop:
    jmp $

DISK_ERROR: db "Disk read error", 0
SECTORS_ERROR: db "Incorrect number of sectors read", 0 

[bits 16]
load_kernel:
    mov bx, MSG_LOAD_KERNEL
    call print_16
    call print_nl_16

    mov bx, KERNEL_OFFSET ; Read from disk and store in 0x1000
    mov dh, 2
    mov dl, [BOOT_DRIVE]
    call disk_load
    ret

[bits 32]
start_kernel:
    call KERNEL_OFFSET
    jmp $

MSG_LOAD_KERNEL db "Loading kernel into memory", 0 

切32bit保護模式

載入kernel後,就是切保護模式與設定pagetable 剩下就可以寫c了

[bits 16]
switch_to_pm:
    cli ; 1. disable interrupts
    lgdt [gdt_descriptor]
    mov eax, cr0
    or eax, 0x1 ; 3. set 32-bit mode bit in cr0
    mov cr0, eax
    jmp CODE_SEG:init_pm ; 4. far jump by using a different segment

[bits 32]
init_pm: ; we are now using 32-bit instructions
    mov ax, DATA_SEG ; 5. update the segment registers
    mov ds, ax
    mov ss, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

    mov ebp, 0x90000 ; 6. update the stack right at the top of the free space
    mov esp, ebp

    call BEGIN_PM ; 7. Call a well-known label with useful code

[bits 32]
BEGIN_PM: ; after the switch we will get here
    mov ebx, MSG_PROT_MODE
    call print_32 ; Note that this will be written at the top left corner
    call start_kernel
    jmp $

BOOT_DRIVE db 0 ; It is a good idea to store it in memory because 'dl' may get overwritten
MSG_REAL_MODE db "Started in 16-bit real mode", 0
MSG_PROT_MODE db "Loaded 32-bit protected mode", 0

怎麼把bootloader與kernel合在一起?

兩個都是binary,直接接起來 接起來的就是一個硬碟

cat boot.bin kernel.bin > os-image.bin

因為沒有要控制資料擺放位子(memory layout),所以這裡就直接接起來 但之後看xv6會看到linker的部分,去調整kernel裡面怎麼放東西

在記憶體了: kernel

kernel如何操作裝置?

操作裝置的方式有兩種

  1. 透過port
  2. 透過mapped的memory

video driver

透過port去讀cursor的訊息

先port in需要什麼參數,之後port out去讀

unsigned char port_byte_in (unsigned short port) {
    unsigned char result;
    /* Inline assembler syntax
     * !! Notice how the source and destination registers are switched from NASM !!
     *
     * '"=a" (result)'; set '=' the C variable '(result)' to the value of register e'a'x
     * '"d" (port)': map the C variable '(port)' into e'd'x register
     *
     * Inputs and outputs are separated by colons
     */
    __asm__("in %%dx, %%al" : "=a" (result) : "d" (port));
    return result;
}

void port_byte_out (unsigned short port, unsigned char data) {
    /* Notice how here both registers are mapped to C variables and
     * nothing is returned, thus, no equals '=' in the asm syntax 
     * However we see a comma since there are two variables in the input area
     * and none in the 'return' area
     */
    __asm__("out %%al, %%dx" : : "a" (data), "d" (port));
}

unsigned short port_word_in (unsigned short port) {
    unsigned short result;
    __asm__("in %%dx, %%ax" : "=a" (result) : "d" (port));
    return result;
}

void port_word_out (unsigned short port, unsigned short data) {
    __asm__("out %%ax, %%dx" : : "a" (data), "d" (port));
}

int get_cursor_offset() {
    /* Use the VGA ports to get the current cursor position
     * 1. Ask for high byte of the cursor offset (data 14)
     * 2. Ask for low byte (data 15)
     */
    port_byte_out(REG_SCREEN_CTRL, 14);
    int offset = port_byte_in(REG_SCREEN_DATA) << 8; /* High byte: << 8 */
    port_byte_out(REG_SCREEN_CTRL, 15);
    offset += port_byte_in(REG_SCREEN_DATA);
    return offset * 2; /* Position * size of character cell */
}
透過video memory去印文字

video memory是一格pixel對到兩個記憶體 第一個是文字 第二個是顏色

int print_char(char c, int col, int row, char attr) {
    unsigned char *vidmem = (unsigned char*) VIDEO_ADDRESS;
    if (!attr) attr = WHITE_ON_BLACK;

    /* Error control: print a red 'E' if the coords aren't right */
    if (col >= MAX_COLS || row >= MAX_ROWS) {
        vidmem[2*(MAX_COLS)*(MAX_ROWS)-2] = 'E'; // HERE
        vidmem[2*(MAX_COLS)*(MAX_ROWS)-1] = RED_ON_WHITE; // HERE
        return get_offset(col, row);
    }

    int offset;
    if (col >= 0 && row >= 0) offset = get_offset(col, row);
    else offset = get_cursor_offset();

    if (c == '\n') {
        row = get_offset_row(offset);
        offset = get_offset(0, row+1);
    } else {
        vidmem[offset] = c; // HERE
        vidmem[offset+1] = attr; // HERE
        offset += 2;
    }
    set_cursor_offset(offset);
    return offset;
}

kernel如何與cpu互動?

  1. kernel -> cpu: 指令
  2. cpu <-> kernel: 中斷

中斷有分

  1. 例外
  2. 裝置

但兩個其實差不多

例外

  1. 宣告表(callback)
global isr0
global isr1
global isr2

isr0:
    cli
    push byte 0
    push byte 0
    jmp isr_common_stub

; 1: Debug Exception
isr1:
    cli
    push byte 0
    push byte 1
    jmp isr_common_stub

; 2: Non Maskable Interrupt Exception
isr2:
    cli
    push byte 0
    push byte 2
    jmp isr_common_stub
  1. 設定handler

也就是做ctx switch(把register存下來,之後寫回去),之後call handler

Q: register存在哪? A: 目前的stack上 (之後xv6會是在自己的process上)

isr_common_stub:
    ; 1. Save CPU state
	pusha ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax
	mov ax, ds ; Lower 16-bits of eax = ds.
	push eax ; save the data segment descriptor
	mov ax, 0x10  ; kernel data segment descriptor
	mov ds, ax
	mov es, ax
	mov fs, ax
	mov gs, ax
	
    ; 2. Call C handler
	call isr_handler
	
    ; 3. Restore state
	pop eax 
	mov ds, ax
	mov es, ax
	mov fs, ax
	mov gs, ax
	popa
	add esp, 8 ; Cleans up the pushed error code and pushed ISR number
	sti
	iret ; pops 5 things at once: CS, EIP, EFLAGS, SS, and ESP
  1. 轉到function
void isr_handler(registers_t r) {
    kprint("received interrupt: ");
    char s[3];
    int_to_ascii(r.int_no, s);
    kprint(s);
    kprint("\n");
    kprint(exception_messages[r.int_no]);
    kprint("\n");
}

裝置

裝置的irq其實差不多 (所以xv6是兩個一起處理的)

這裡看個鍵盤的例子,鍵盤就是透過port讀打了什麼之後轉成char,印出來

#define BACKSPACE 0x0E
#define ENTER 0x1C

static char key_buffer[256];

#define SC_MAX 57
const char *sc_name[] = { "ERROR", "Esc", "1", "2", "3", "4", "5", "6", 
    "7", "8", "9", "0", "-", "=", "Backspace", "Tab", "Q", "W", "E", 
        "R", "T", "Y", "U", "I", "O", "P", "[", "]", "Enter", "Lctrl", 
        "A", "S", "D", "F", "G", "H", "J", "K", "L", ";", "'", "`", 
        "LShift", "\\", "Z", "X", "C", "V", "B", "N", "M", ",", ".", 
        "/", "RShift", "Keypad *", "LAlt", "Spacebar"};
const char sc_ascii[] = { '?', '?', '1', '2', '3', '4', '5', '6',     
    '7', '8', '9', '0', '-', '=', '?', '?', 'Q', 'W', 'E', 'R', 'T', 'Y', 
        'U', 'I', 'O', 'P', '[', ']', '?', '?', 'A', 'S', 'D', 'F', 'G', 
        'H', 'J', 'K', 'L', ';', '\'', '`', '?', '\\', 'Z', 'X', 'C', 'V', 
        'B', 'N', 'M', ',', '.', '/', '?', '?', '?', ' '};

static void keyboard_callback(registers_t regs) {
    /* The PIC leaves us the scancode in port 0x60 */
    uint8_t scancode = port_byte_in(0x60);

    if (scancode > SC_MAX) return;
    if (scancode == BACKSPACE) {
        backspace(key_buffer);
        kprint_backspace();
    } else if (scancode == ENTER) {
        kprint("\n");
        user_input(key_buffer); /* kernel-controlled function */
        key_buffer[0] = '\0';
    } else {
        char letter = sc_ascii[(int)scancode];
        /* Remember that kprint only accepts char[] */
        char str[2] = {letter, '\0'};
        append(key_buffer, letter);
        kprint(str);
    }
    UNUSED(regs);
}

void init_keyboard() {
   register_interrupt_handler(IRQ1, keyboard_callback); 
}

之後哩

到此已經可以

  • 開機
  • 打字
  • print

可以當成整台電腦只有我們這一個process,就是整台電腦 (所以可以反過來看,只要有process就是有了一台電腦的幻覺)

但如果要讓多個process一起用時? 或是應該問,我們還缺什麼?

  • process
    • mem management
    • concurrent
  • user/kernel mode
  • filesystem

之後到xv6一邊看實作,一邊做lab

Ref

os-tutorial Overview of IA-32 assembly programming Which variable size to use (db, dw, dd) with x86 assembly? x86 Assembly Guide