| ;; ----------------------------------------------------------------------- |
| ;; |
| ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved |
| ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin |
| ;; |
| ;; This program is free software; you can redistribute it and/or modify |
| ;; it under the terms of the GNU General Public License as published by |
| ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
| ;; Boston MA 02111-1307, USA; either version 2 of the License, or |
| ;; (at your option) any later version; incorporated herein by reference. |
| ;; |
| ;; ----------------------------------------------------------------------- |
| |
| ;; |
| ;; bcopy32xx.inc |
| ;; |
| |
| |
| ; |
| ; 32-bit bcopy routine |
| ; |
| ; This is the actual 32-bit portion of the bcopy and shuffle and boot |
| ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the |
| ; sole exception being the actual relocation code at the beginning of |
| ; pm_shuffle_boot. |
| ; |
| ; It also really needs to live all in a single segment, for the |
| ; address calculcations to actually work. |
| ; |
| |
| bits 32 |
| section .bcopyxx.text |
| align 16 |
| ; |
| ; pm_bcopy: |
| ; |
| ; This is the protected-mode core of the "bcopy" routine. |
| ; Try to do aligned transfers; if the src and dst are relatively |
| ; misaligned, align the dst. |
| ; |
| ; ECX is guaranteed to not be zero on entry. |
| ; |
| ; Clobbers ESI, EDI, ECX. |
| ; |
| |
| pm_bcopy: |
| push ebx |
| push edx |
| push eax |
| |
| cmp esi,-1 |
| je .bzero |
| |
| cmp esi,edi ; If source < destination, we might |
| jb .reverse ; have to copy backwards |
| |
| .forward: |
| ; Initial alignment |
| mov edx,edi |
| shr edx,1 |
| jnc .faa1 |
| movsb |
| dec ecx |
| .faa1: |
| mov al,cl |
| cmp ecx,2 |
| jb .f_tiny |
| |
| shr edx,1 |
| jnc .faa2 |
| movsw |
| sub ecx,2 |
| .faa2: |
| |
| ; Bulk transfer |
| mov al,cl ; Save low bits |
| shr ecx,2 ; Convert to dwords |
| rep movsd ; Do our business |
| ; At this point ecx == 0 |
| |
| test al,2 |
| jz .fab2 |
| movsw |
| .fab2: |
| .f_tiny: |
| test al,1 |
| jz .fab1 |
| movsb |
| .fab1: |
| .done: |
| pop eax |
| pop edx |
| pop ebx |
| ret |
| |
| .reverse: |
| lea eax,[esi+ecx-1] ; Point to final byte |
| cmp edi,eax |
| ja .forward ; No overlap, do forward copy |
| |
| std ; Reverse copy |
| lea edi,[edi+ecx-1] |
| mov esi,eax |
| |
| ; Initial alignment |
| mov edx,edi |
| shr edx,1 |
| jc .raa1 |
| movsb |
| dec ecx |
| .raa1: |
| |
| dec esi |
| dec edi |
| mov al,cl |
| cmp ecx,2 |
| jb .r_tiny |
| shr edx,1 |
| jc .raa2 |
| movsw |
| sub ecx,2 |
| .raa2: |
| |
| ; Bulk copy |
| sub esi,2 |
| sub edi,2 |
| mov al,cl ; Save low bits |
| shr ecx,2 |
| rep movsd |
| |
| ; Final alignment |
| .r_final: |
| add esi,2 |
| add edi,2 |
| test al,2 |
| jz .rab2 |
| movsw |
| .rab2: |
| .r_tiny: |
| inc esi |
| inc edi |
| test al,1 |
| jz .rab1 |
| movsb |
| .rab1: |
| cld |
| jmp short .done |
| |
| .bzero: |
| xor eax,eax |
| |
| ; Initial alignment |
| mov edx,edi |
| shr edx,1 |
| jnc .zaa1 |
| stosb |
| dec ecx |
| .zaa1: |
| |
| mov bl,cl |
| cmp ecx,2 |
| jb .z_tiny |
| shr edx,1 |
| jnc .zaa2 |
| stosw |
| sub ecx,2 |
| .zaa2: |
| |
| ; Bulk |
| mov bl,cl ; Save low bits |
| shr ecx,2 |
| rep stosd |
| |
| test bl,2 |
| jz .zab2 |
| stosw |
| .zab2: |
| .z_tiny: |
| test bl,1 |
| jz .zab1 |
| stosb |
| .zab1: |
| jmp short .done |
| |
| ; |
| ; shuffle_and_boot: |
| ; |
| ; This routine is used to shuffle memory around, followed by |
| ; invoking an entry point somewhere in low memory. This routine |
| ; can clobber any memory outside the bcopy special area. |
| ; |
| ; IMPORTANT: This routine does not set up any registers. |
| ; It is the responsibility of the caller to generate an appropriate entry |
| ; stub; *especially* when going to real mode. |
| ; |
| ; Inputs: |
| ; ESI -> Pointer to list of (dst, src, len) pairs(*) |
| ; EDI -> Pointer to safe area for list + shuffler |
| ; (must not overlap this code nor the RM stack) |
| ; ECX -> Byte count of list area (for initial copy) |
| ; |
| ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; |
| ; this is handled inside the bcopy routine. |
| ; |
| ; If len == 0: this marks the end of the list; dst indicates |
| ; the entry point and src the mode (0 = pm, 1 = rm) |
| ; |
| ; (*) dst, src, and len are four bytes each |
| ; |
| ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: |
| ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) |
| ; |
| global do_raw_shuffle_and_boot |
| do_raw_shuffle_and_boot: |
| mov edi,eax |
| mov esi,edx |
| |
| pm_shuffle: |
| cli ; End interrupt service (for good) |
| mov ebx,edi ; EBX <- descriptor list |
| lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to |
| and edx,~15 ; Align 16 to benefit the GDT |
| call pm_bcopy |
| mov esi,__bcopyxx_start ; Absolute source address |
| mov edi,edx ; Absolute target address |
| sub edx,esi ; EDX <- address delta |
| mov ecx,__bcopyxx_dwords |
| lea eax,[edx+.safe] ; Resume point |
| ; Relocate this code |
| rep movsd |
| jmp eax ; Jump to safe location |
| .safe: |
| ; Give ourselves a safe stack |
| lea esp,[edx+bcopyxx_stack+__bcopyxx_end] |
| add edx,bcopy_gdt ; EDX <- new GDT |
| mov [edx+2],edx ; GDT self-pointer |
| lgdt [edx] ; Switch to local GDT |
| |
| ; Now for the actual shuffling... |
| .loop: |
| mov edi,[ebx] |
| mov esi,[ebx+4] |
| mov ecx,[ebx+8] |
| add ebx,12 |
| jecxz .done |
| call pm_bcopy |
| jmp .loop |
| .done: |
| lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT |
| push ecx ; == 0, for cleaning the flags register |
| and esi,esi |
| jz pm_shuffle_16 |
| popfd ; Clean the flags |
| jmp edi ; Protected mode entry |
| |
| ; We have a 16-bit entry point, so we need to return |
| ; to 16-bit mode. Note: EDX already points to the GDT. |
| pm_shuffle_16: |
| mov eax,edi |
| mov [edx+PM_CS16+2],ax |
| mov [edx+PM_DS16+2],ax |
| shr eax,16 |
| mov [edx+PM_CS16+4],al |
| mov [edx+PM_CS16+7],ah |
| mov [edx+PM_DS16+4],al |
| mov [edx+PM_DS16+7],ah |
| mov eax,cr0 |
| and al,~1 |
| popfd ; Clean the flags |
| ; No flag-changing instructions below... |
| mov dx,PM_DS16 |
| mov ds,edx |
| mov es,edx |
| mov fs,edx |
| mov gs,edx |
| mov ss,edx |
| jmp PM_CS16:0 |
| |
| section .bcopyxx.data |
| |
| alignz 16 |
| ; GDT descriptor entry |
| %macro desc 1 |
| bcopy_gdt.%1: |
| PM_%1 equ bcopy_gdt.%1-bcopy_gdt |
| %endmacro |
| |
| bcopy_gdt: |
| dw bcopy_gdt_size-1 ; Null descriptor - contains GDT |
| dd bcopy_gdt ; pointer for LGDT instruction |
| dw 0 |
| |
| ; TSS segment to keep Intel VT happy. Intel VT is |
| ; unhappy about anything that doesn't smell like a |
| ; full-blown 32-bit OS. |
| desc TSS |
| dw 104-1, DummyTSS ; 08h 32-bit task state segment |
| dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS |
| |
| desc CS16 |
| dd 0000ffffh ; 10h Code segment, use16, readable, |
| dd 00009b00h ; present, dpl 0, cover 64K |
| desc DS16 |
| dd 0000ffffh ; 18h Data segment, use16, read/write, |
| dd 00009300h ; present, dpl 0, cover 64K |
| desc CS32 |
| dd 0000ffffh ; 20h Code segment, use32, readable, |
| dd 00cf9b00h ; present, dpl 0, cover all 4G |
| desc DS32 |
| dd 0000ffffh ; 28h Data segment, use32, read/write, |
| dd 00cf9300h ; present, dpl 0, cover all 4G |
| |
| bcopy_gdt_size: equ $-bcopy_gdt |
| ; |
| ; Space for a dummy task state segment. It should never be actually |
| ; accessed, but just in case it is, point to a chunk of memory that |
| ; has a chance to not be used for anything real... |
| ; |
| DummyTSS equ 0x580 |
| |
| align 4 |
| RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) |
| dd 0 ; Offset |
| |
| bcopyxx_stack equ 128 ; We want this much stack |
| |
| section .rodata |
| global __syslinux_shuffler_size |
| extern __bcopyxx_len |
| align 4 |
| __syslinux_shuffler_size: |
| dd __bcopyxx_len |
| |
| bits 16 |
| section .text16 |