Index: /usr/src/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp =================================================================== --- /usr/src/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp (revision 342352) +++ /usr/src/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp (working copy) @@ -71,6 +71,24 @@ static uint16_t highest(uint64_t V) { return V >> 48; } static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; } +// Extracts the 'PO' field of an instruction encoding. +static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); } + +static bool isDQFormInstruction(uint32_t Encoding) { + switch (getPrimaryOpCode(Encoding)) { + default: + return false; + case 56: + // The only instruction with a primary opcode of 56 is `lq`. + return true; + case 61: + // There are both DS and DQ instruction forms with this primary opcode. + // Namely `lxv` and `stxv` are the DQ-forms that use it. + // The DS 'XO' bits being set to 01 is restricted to DQ form. + return (Encoding & 3) == 0x1; + } +} + PPC64::PPC64() { GotRel = R_PPC64_GLOB_DAT; PltRel = R_PPC64_JMP_SLOT; @@ -95,7 +113,7 @@ // We need 64K pages (at least under glibc/Linux, the loader won't // set different permissions on a finer granularity than that). - DefaultMaxPageSize = 65536; + DefaultMaxPageSize = 4096; // The PPC64 ELF ABI v1 spec, says: // @@ -405,10 +423,15 @@ write16(Loc, Val); break; case R_PPC64_ADDR16_DS: - case R_PPC64_TPREL16_DS: + case R_PPC64_TPREL16_DS: { checkInt(Loc, Val, 16, Type); - write16(Loc, (read16(Loc) & 3) | (Val & ~3)); - break; + // DQ-form instructions use bits 28-31 as part of the instruction encoding + // DS-form instructions only use bits 30-31. + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint16_t Mask = isDQFormInstruction(read32(Loc - EndianOffset)) ? 0xF : 0x3; + checkAlignment(Loc, lo(Val), Mask + 1, Type); + write16(Loc, (read16(Loc) & Mask) | lo(Val)); + } break; case R_PPC64_ADDR16_HA: case R_PPC64_REL16_HA: case R_PPC64_TPREL16_HA: @@ -441,9 +464,14 @@ write16(Loc, lo(Val)); break; case R_PPC64_ADDR16_LO_DS: - case R_PPC64_TPREL16_LO_DS: - write16(Loc, (read16(Loc) & 3) | (lo(Val) & ~3)); - break; + case R_PPC64_TPREL16_LO_DS: { + // DQ-form instructions use bits 28-31 as part of the instruction encoding + // DS-form instructions only use bits 30-31. + uint32_t EndianOffset = Config->EKind == ELF64BEKind ? 2U : 0U; + uint16_t Mask = isDQFormInstruction(read32(Loc - EndianOffset)) ? 0xF : 0x3; + checkAlignment(Loc, lo(Val), Mask + 1, Type); + write16(Loc, (read16(Loc) & Mask) | lo(Val)); + } break; case R_PPC64_ADDR32: case R_PPC64_REL32: checkInt(Loc, Val, 32, Type); @@ -456,7 +484,8 @@ break; case R_PPC64_REL24: { uint32_t Mask = 0x03FFFFFC; - checkInt(Loc, Val, 24, Type); + checkInt(Loc, Val, 26, Type); + checkAlignment(Loc, Val, 4, Type); write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask)); break; } Index: /usr/src/lib/clang/libllvm/Makefile =================================================================== --- /usr/src/lib/clang/libllvm/Makefile (revision 342352) +++ /usr/src/lib/clang/libllvm/Makefile (working copy) @@ -820,7 +820,7 @@ SRCS_MIN+= Support/regexec.c SRCS_MIN+= Support/regfree.c SRCS_MIN+= Support/regstrlcpy.c -SRCS_LLD+= Support/xxhash.cpp +SRCS_MIN+= Support/xxhash.cpp SRCS_MIN+= TableGen/Error.cpp SRCS_MIN+= TableGen/JSONBackend.cpp SRCS_MIN+= TableGen/Main.cpp Index: /usr/src/lib/clang/libllvmminimal/Makefile =================================================================== --- /usr/src/lib/clang/libllvmminimal/Makefile (revision 342352) +++ /usr/src/lib/clang/libllvmminimal/Makefile (working copy) @@ -19,6 +19,7 @@ SRCS+= Support/ErrorHandling.cpp SRCS+= Support/FoldingSet.cpp SRCS+= Support/FormattedStream.cpp +SRCS+= Support/FormatVariadic.cpp SRCS+= Support/Hashing.cpp SRCS+= Support/Host.cpp SRCS+= Support/IntEqClasses.cpp Index: /usr/src/libexec/rtld-elf/powerpc64/reloc.c =================================================================== --- /usr/src/libexec/rtld-elf/powerpc64/reloc.c (revision 342352) +++ /usr/src/libexec/rtld-elf/powerpc64/reloc.c (working copy) @@ -496,6 +496,8 @@ } out: #else + /* defobj is not used on ELFv2. */ + (void)(defobj); dbg(" reloc_jmpslot: where=%p, target=%p", (void *)wherep, (void *)target); Index: /usr/src/sys/conf/kern.pre.mk =================================================================== --- /usr/src/sys/conf/kern.pre.mk (revision 342352) +++ /usr/src/sys/conf/kern.pre.mk (working copy) @@ -128,7 +128,7 @@ .endif .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ - ${MACHINE_CPUARCH} == "i386") && \ + ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc64") && \ defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mifunc} == "" .error amd64/arm64/i386 kernel requires linker ifunc support .endif @@ -137,7 +137,7 @@ .if ${LINKER_TYPE} != "lld" LDFLAGS+= -Wl,-z common-page-size=4096 .else -LDFLAGS+= -Wl,-z -Wl,ifunc-noplt +LDFLAGS+= -Wl,-z -Wl,ifunc-noplt -v .endif .endif Index: /usr/src/sys/conf/ldscript.powerpc64 =================================================================== --- /usr/src/sys/conf/ldscript.powerpc64 (revision 342352) +++ /usr/src/sys/conf/ldscript.powerpc64 (working copy) @@ -6,12 +6,20 @@ ENTRY(__start) SEARCH_DIR(/usr/lib); PROVIDE (__stack = 0); + +/* For loading, we use a simplified ELF structure using a single PT_LOAD section. */ +PHDRS +{ + /* Kernel segment contains kernel and data. The actual protections will be applied later. */ + text PT_LOAD ; + dynamic PT_DYNAMIC ; +} + SECTIONS { - /* Low-address wrapper for bootloaders (kexec/kboot) that can't parse ELF */ . = kernbase - 0x100; - .kboot : { *(.text.kboot) } + .kboot : { *(.text.kboot) } :text /* Read-only sections, merged into text segment: */ . = kernbase; @@ -19,6 +27,8 @@ .text : { + *(.glink) + *(.text.early) *(.text) *(.stub) /* .gnu.warning sections are handled specially by elf32.em. */ @@ -29,7 +39,6 @@ PROVIDE (etext = .); /* Do not emit PT_INTERP section, which confuses some loaders (kexec-lite) */ - .interpX : { *(.interp) } : NONE /DISCARD/ : { *(.interp) } /* Also delete notes */ @@ -92,10 +101,11 @@ .toc1 : ALIGN(8) { *(.toc1) } .opd : ALIGN(8) { KEEP (*(.opd)) } .branch_lt : ALIGN(8) { *(.branch_lt) } - . = ALIGN(4096); - .got : ALIGN(8) { __tocbase = .; *(.got .toc) } + . = ALIGN(4096); /* Align to page! */ + .got : ALIGN(8) { __tocbase = .; *(.got) } + .toc : ALIGN(8) { *(.toc) } - .dynamic : { *(.dynamic) } + .dynamic : { *(.dynamic) } :text :dynamic /* Put .ctors and .dtors next to the .got2 section, so that the pointers get relocated with -mrelocatable. Also put in the .fixup pointers. The current compiler no longer needs this, but keep it around for 2.7.2 */ Index: /usr/src/sys/powerpc/aim/aim_machdep.c =================================================================== --- /usr/src/sys/powerpc/aim/aim_machdep.c (revision 342352) +++ /usr/src/sys/powerpc/aim/aim_machdep.c (working copy) @@ -158,6 +158,11 @@ extern void *imisstrap, *imisssize; extern void *dlmisstrap, *dlmisssize; extern void *dsmisstrap, *dsmisssize; +#ifdef __powerpc64__ +extern void *cpu_reset_handler; +extern void *cpu_wakeup_handler; +extern void *power_save_sequence; +#endif extern void *ap_pcpu; extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); @@ -391,6 +396,15 @@ /* Set TOC base so that the interrupt code can get at it */ *((void **)TRAP_GENTRAP) = &generictrap; *((register_t *)TRAP_TOCBASE) = toc; + /* + * Set up special support function addresses. + * These functions do not use C calling conventions, + * they operate on magic addresses in the trap vector. + */ + *((void **)TRAP_ADDR_CPU_RESET) = &cpu_reset_handler; + *((void **)TRAP_ADDR_CPU_WAKEUP) = &cpu_wakeup_handler; + *((void **)TRAP_ADDR_POWER_SAVE) = &power_save_sequence; + #else /* Set branch address for trap code */ if (cpu_features & PPC_FEATURE_64) Index: /usr/src/sys/powerpc/aim/trap_subr64.S =================================================================== --- /usr/src/sys/powerpc/aim/trap_subr64.S (revision 342352) +++ /usr/src/sys/powerpc/aim/trap_subr64.S (working copy) @@ -47,6 +47,16 @@ sldi r,r,32; \ ori r,r,TRAP_TOCBASE; /* Magic address for TOC */ \ ld r,0(r) +/* + * 8 bytes - Load DMAP address 0 into a register. + * Setup for accessing trap area memory with the MMU on or off. + * Optimized for low instruction count instead of flexibility. + * Note: This assumes the least significant 48 bits of + * DMAP_BASE_ADDRESS are 0! + */ +#define DMAP_ZERO(r) \ + lis r,DMAP_BASE_ADDRESS@highesta; /* To real-mode alias/dmap */ \ + sldi r,r,32 /* * Restore SRs for a pmap @@ -317,35 +327,34 @@ * processor is waking up from power saving mode * It is software reset when 46:47 = 0b00 */ - mfsrr1 %r9 /* Load SRR1 into r9 */ - andis. %r9,%r9,0x3 /* Logic AND with 46:47 bits */ - beq 2f /* Branch if software reset */ - bl 1f - .llong cpu_wakeup_handler + DMAP_ZERO(%r2) /* 0x100 and 0x104 */ + mfsrr1 %r9 /* 0x108 Load SRR1 into r9 */ + andis. %r9,%r9,0x3 /* 0x10c Logic AND with 46:47 bits */ + + beq 2f /* 0x110 Branch if software reset */ + ld %r9,TRAP_ADDR_CPU_WAKEUP(%r2) /* 0x114 */ + b 1f /* 0x118 */ /* It is software reset */ /* Explicitly set MSR[SF] */ -2: mfmsr %r9 - li %r8,1 - insrdi %r9,%r8,1,0 - mtmsrd %r9 - isync +2: mfmsr %r9 /* 0x11c */ + li %r8,1 /* 0x120 */ + insrdi %r9,%r8,1,0 /* 0x124 */ + mtmsrd %r9 /* 0x128 */ + isync /* 0x12c */ - bl 1f - .llong cpu_reset_handler /* Make sure to maintain 8-byte alignment */ + ld %r9,TRAP_ADDR_CPU_RESET(%r2) /* 0x128 */ -1: mflr %r9 - ld %r9,0(%r9) - mtlr %r9 - - blr +1: mtlr %r9 /* 0x12c */ + blr /* 0x130 */ CNAME(rstcodeend): cpu_reset_handler: GET_TOCBASE(%r2) - ld %r1,TOC_REF(tmpstk)(%r2) /* get new SP */ + addis %r1,%r2,TOC_REF(tmpstk)@ha + ld %r1,TOC_REF(tmpstk)@l(%r2) /* get new SP */ addi %r1,%r1,(TMPSTKSZ-48) bl CNAME(cpudep_ap_early_bootstrap) /* Set PCPU */ @@ -380,7 +389,8 @@ GET_TOCBASE(%r2) /* Check for false wake up due to badly SRR1 set (eg. by OPAL) */ - ld %r3,TOC_REF(can_wakeup)(%r2) + addis %r3,%r2,TOC_REF(can_wakeup)@ha + ld %r3,TOC_REF(can_wakeup)@l(%r2) ld %r3,0(%r3) cmpdi %r3,0 beq cpu_reset_handler @@ -469,40 +479,42 @@ .globl CNAME(slbtrap),CNAME(slbtrapend) .p2align 3 CNAME(slbtrap): + /* 0x00 */ mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r2,(PC_SLBSAVE+16)(%r1) mfcr %r2 /* save CR */ + /* 0x10 */ std %r2,(PC_SLBSAVE+104)(%r1) mfsrr1 %r2 /* test kernel mode */ mtcr %r2 bf 17,2f /* branch if PSL_PR is false */ + /* 0x20 */ /* User mode */ ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ mtcr %r2 ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2 */ mflr %r1 /* Save the old LR in r1 */ + /* 0x30 */ mtsprg2 %r1 /* And then in SPRG2 */ - /* 52 bytes so far */ - bl 1f - .llong generictrap -1: mflr %r1 - ld %r1,0(%r1) + DMAP_ZERO(%r1) /* two instructions */ + ld %r1,TRAP_GENTRAP(%r1) + /* 0x40 */ mtlr %r1 li %r1, 0x80 /* How to get the vector from LR */ blrl /* Branch to generictrap */ - /* 84 bytes */ 2: mflr %r2 /* Save the old LR in r2 */ - nop - bl 3f /* Begin dance to jump to kern_slbtrap*/ - .llong kern_slbtrap -3: mflr %r1 - ld %r1,0(%r1) + /* 0x50 */ + DMAP_ZERO(%r1) /* two instructions */ + ld %r1,TRAP_GENTRAP(%r1) + addi %r31,%r31,(kern_slbtrap-generictrap) + /* 0x60 */ mtlr %r1 GET_CPUINFO(%r1) - blrl /* 124 bytes -- 4 to spare */ + blrl /* must fit in 128 bytes! */ CNAME(slbtrapend): + .globl kern_slbtrap: std %r2,(PC_SLBSAVE+136)(%r1) /* old LR */ std %r3,(PC_SLBSAVE+24)(%r1) /* save R3 */ @@ -639,14 +651,9 @@ mflr %r28 /* save LR */ mfcr %r29 /* save CR */ - /* Begin dance to branch to s_trap in a bit */ - b 1f - .p2align 3 -1: nop - bl 1f - .llong s_trap -1: mflr %r31 - ld %r31,0(%r31) + DMAP_ZERO(%r31) /* two instructions */ + ld %r31,TRAP_GENTRAP(%r31) + addi %r31,%r31,(s_trap - generictrap) mtlr %r31 /* Put our exception vector in SPRG3 */ @@ -679,10 +686,9 @@ mfsrr1 %r31 /* test kernel mode */ mtcr %r31 mflr %r28 /* save LR (SP already saved) */ - bl 1f /* Begin branching to disitrap */ - .llong disitrap -1: mflr %r1 - ld %r1,0(%r1) + DMAP_ZERO(%r1) + ld %r1,TRAP_GENTRAP(%r1) + addi %r1,%r1,(dsitrap-generictrap) mtlr %r1 blrl /* Branch to generictrap */ CNAME(dsiend): @@ -897,7 +903,8 @@ mtsprg3 %r1 GET_TOCBASE(%r1) /* get new SP */ - ld %r1,TOC_REF(tmpstk)(%r1) + addis %r1,%r1,TOC_REF(tmpstk)@ha + ld %r1,TOC_REF(tmpstk)@l(%r1) addi %r1,%r1,(TMPSTKSZ-48) FRAME_SETUP(PC_DBSAVE) @@ -963,11 +970,9 @@ std %r30,(PC_DBSAVE+CPUSAVE_R30)(%r1) /* free r30 */ std %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) /* free r31 */ mflr %r28 /* save LR */ - nop /* alignment */ - bl 9f /* Begin branch */ - .llong dbtrap -9: mflr %r1 - ld %r1,0(%r1) + DMAP_ZERO(%r1) + ld %r1,TRAP_GENTRAP(%r1) + addi %r1,%r1,(dbtrap-generictrap) mtlr %r1 blrl /* Branch to generictrap */ CNAME(dbend): Index: /usr/src/sys/powerpc/include/trap.h =================================================================== --- /usr/src/sys/powerpc/include/trap.h (revision 342352) +++ /usr/src/sys/powerpc/include/trap.h (working copy) @@ -146,9 +146,12 @@ /* DTrace trap opcode. */ #define EXC_DTRACE 0x7ffff808 -/* Magic pointer to store TOC base and other info for trap handlers on ppc64 */ -#define TRAP_GENTRAP 0x1f0 -#define TRAP_TOCBASE 0x1f8 +/* Magic pointers to store TOC base and other info for trap handlers on ppc64 */ +#define TRAP_ADDR_POWER_SAVE 0x1c8 /* &power_save_sequence */ +#define TRAP_ADDR_CPU_RESET 0x1d0 /* &cpu_reset_handler */ +#define TRAP_ADDR_CPU_WAKEUP 0x1d8 /* &cpu_wakeup_handler */ +#define TRAP_GENTRAP 0x1f0 /* &generictrap */ +#define TRAP_TOCBASE 0x1f8 /* Located at end of EXC_RST region. */ #ifndef LOCORE struct trapframe; Index: /usr/src/sys/powerpc/ofw/ofwcall64.S =================================================================== --- /usr/src/sys/powerpc/ofw/ofwcall64.S (revision 342352) +++ /usr/src/sys/powerpc/ofw/ofwcall64.S (working copy) @@ -101,11 +101,13 @@ mfmsr %r6 /* read client interface handler */ - ld %r4,TOC_REF(openfirmware_entry)(%r2) + addis %r4,%r2,TOC_REF(openfirmware_entry)@ha + ld %r4,TOC_REF(openfirmware_entry)@l(%r4) ld %r4,0(%r4) /* Get OF stack pointer */ - ld %r7,TOC_REF(ofwstk)(%r2) + addis %r7,%r2,TOC_REF(ofwstk)@ha + ld %r7,TOC_REF(ofwstk)@l(%r7) addi %r7,%r7,OFWSTKSZ-40 /* @@ -113,7 +115,8 @@ * exceptions, which is important for the next few steps. */ - ld %r5,TOC_REF(ofmsr)(%r2) + addis %r5,%r2,TOC_REF(ofmsr)@ha + ld %r5,TOC_REF(ofmsr)@l(%r5) ld %r5,0(%r5) mtmsrd %r5 isync @@ -233,9 +236,11 @@ mfmsr %r6 /* Read RTAS entry and reg save area pointers */ - ld %r5,TOC_REF(rtas_entry)(%r2) + addis %r5,%r2,TOC_REF(rtas_entry)@ha + ld %r5,TOC_REF(rtas_entry)@l(%r5) ld %r5,0(%r5) - ld %r8,TOC_REF(rtas_regsave)(%r2) + addis %r8,%r2,TOC_REF(rtas_regsave)@ha + ld %r8,TOC_REF(rtas_regsave)@l(%r8) /* * Set the MSR to the RTAS value. This has the side effect of disabling @@ -242,7 +247,8 @@ * exceptions, which is important for the next few steps. */ - ld %r7,TOC_REF(rtasmsr)(%r2) + addis %r7,%r2,TOC_REF(rtasmsr)@ha + ld %r7,TOC_REF(rtasmsr)@l(%r7) ld %r7,0(%r7) mtmsrd %r7 isync Index: /usr/src/sys/powerpc/powernv/opalcall.S =================================================================== --- /usr/src/sys/powerpc/powernv/opalcall.S (revision 342352) +++ /usr/src/sys/powerpc/powernv/opalcall.S (working copy) @@ -53,7 +53,8 @@ /* Load OPAL entry information */ mr %r0,%r3 - ld %r3,TOC_REF(opal_entrypoint)(%r2) + addis %r3,%r2,TOC_REF(opal_entrypoint)@ha + ld %r3,TOC_REF(opal_entrypoint)@l(%r3) ld %r3,0(%r3) mtctr %r3 @@ -62,9 +63,11 @@ mfmsr %r31 /* Load last bits from the TOC */ - ld %r3,TOC_REF(opal_msr)(%r2) + addis %r3,%r2,TOC_REF(opal_msr)@ha + ld %r3,TOC_REF(opal_msr)@l(%r3) ld %r3,0(%r3) - ld %r2,TOC_REF(opal_data)(%r2) + addis %r2,%r2,TOC_REF(opal_data)@ha + ld %r2,TOC_REF(opal_data)@l(%r2) ld %r2,0(%r2) mtmsrd %r3 Index: /usr/src/sys/powerpc/powerpc/cpu_subr64.S =================================================================== --- /usr/src/sys/powerpc/powerpc/cpu_subr64.S (revision 342352) +++ /usr/src/sys/powerpc/powerpc/cpu_subr64.S (working copy) @@ -30,6 +30,7 @@ #include "assym.inc" #include +#include .globl CNAME(power_save_sequence) .p2align 3 @@ -65,11 +66,9 @@ std %r1,PCB_SP(%r3) /* Save the stack pointer */ std %r2,PCB_TOC(%r3) /* Save the TOC pointer */ - /* Set where we want to jump */ - bl 1f - .llong power_save_sequence /* Remember about 8 byte alignment */ -1: mflr %r3 - ld %r3,0(%r3) + lis %r3,DMAP_BASE_ADDRESS@highesta; /* To real-mode alias/dmap */ + sldi %r3,%r3,32 + ld %r3,TRAP_ADDR_POWER_SAVE(%r3) mtsrr0 %r3 /* Set MSR */ Index: /usr/src/sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- /usr/src/sys/powerpc/powerpc/elf64_machdep.c (revision 342352) +++ /usr/src/sys/powerpc/powerpc/elf64_machdep.c (working copy) @@ -125,6 +125,9 @@ .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, + .sv_trap = NULL, + .sv_hwcap = &cpu_features, + .sv_hwcap2 = &cpu_features2, }; INIT_SYSENTVEC(elf64_sysvec_v2, &elf64_freebsd_sysvec_v2); Index: /usr/src/sys/powerpc/powerpc/swtch64.S =================================================================== --- /usr/src/sys/powerpc/powerpc/swtch64.S (revision 342352) +++ /usr/src/sys/powerpc/powerpc/swtch64.S (working copy) @@ -159,7 +159,8 @@ cpu_switchin: #if defined(SMP) && defined(SCHED_ULE) /* Wait for the new thread to become unblocked */ - ld %r6,TOC_REF(blocked_lock)(%r2) + addis %r6,%r2,TOC_REF(blocked_lock)@ha + ld %r6,TOC_REF(blocked_lock)@l(%r6) blocked_loop: ld %r7,TD_LOCK(%r13) cmpd %r6,%r7