commit a9085a598b6a0eebd4635000f3e1852189bc3b87 Author: Brandon Bergren Date: Wed Nov 27 15:14:15 2019 -0600 battlez binary size backport -- ppc64 tests diff --git a/contrib/llvm-project/lld/ELF/Config.h b/contrib/llvm-project/lld/ELF/Config.h index ff9d3dc0933..02124c04c63 100644 --- a/contrib/llvm-project/lld/ELF/Config.h +++ b/contrib/llvm-project/lld/ELF/Config.h @@ -210,6 +210,7 @@ struct Configuration { bool zOrigin; bool zRelro; bool zRodynamic; + bool zSeparateCode; bool zText; bool zRetpolineplt; bool zWxneeded; diff --git a/contrib/llvm-project/lld/ELF/Driver.cpp b/contrib/llvm-project/lld/ELF/Driver.cpp index 81894e98f39..4ba41409348 100644 --- a/contrib/llvm-project/lld/ELF/Driver.cpp +++ b/contrib/llvm-project/lld/ELF/Driver.cpp @@ -378,9 +378,10 @@ static bool isKnownZFlag(StringRef s) { s == "execstack" || s == "global" || s == "hazardplt" || s == "ifunc-noplt" || s == "initfirst" || s == "interpose" || s == "keep-text-section-prefix" || s == "lazy" || s == "muldefs" || - s == "nocombreloc" || s == "nocopyreloc" || s == "nodefaultlib" || - s == "nodelete" || s == "nodlopen" || s == "noexecstack" || - s == "nokeep-text-section-prefix" || s == "norelro" || s == "notext" || + s == "separate-code" || s == "nocombreloc" || s == "nocopyreloc" || + s == "nodefaultlib" || s == "nodelete" || s == "nodlopen" || + s == "noexecstack" || s == "nokeep-text-section-prefix" || + s == "norelro" || s == "noseparate-code" || s == "notext" || s == "now" || s == "origin" || s == "relro" || s == "retpolineplt" || s == "rodynamic" || s == "text" || s == "wxneeded" || s.startswith("common-page-size") || s.startswith("max-page-size=") || @@ -935,6 +936,7 @@ static void readConfigs(opt::InputArgList &args) { config->zRelro = getZFlag(args, "relro", "norelro", true); config->zRetpolineplt = hasZOption(args, "retpolineplt"); config->zRodynamic = hasZOption(args, "rodynamic"); + config->zSeparateCode = getZFlag(args, "separate-code", "noseparate-code", false); config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0); config->zText = getZFlag(args, "text", "notext", true); config->zWxneeded = hasZOption(args, "wxneeded"); diff --git a/contrib/llvm-project/lld/ELF/InputSection.cpp b/contrib/llvm-project/lld/ELF/InputSection.cpp index a024ac307b0..d927658e4a6 100644 --- a/contrib/llvm-project/lld/ELF/InputSection.cpp +++ b/contrib/llvm-project/lld/ELF/InputSection.cpp @@ -608,26 +608,38 @@ static int64_t getTlsTpOffset(const Symbol &s) { if (&s == ElfSym::tlsModuleBase) return 0; + // There are 2 TLS layouts. Among targets we support, x86 uses TLS Variant 2 + // while most others use Variant 1. At run time TP will be aligned to p_align. + + // Variant 1. TP will be followed by an optional gap (which is the size of 2 + // pointers on ARM/AArch64, 0 on other targets), followed by alignment + // padding, then the static TLS blocks. The alignment padding is added so that + // (TP + gap + padding) is congruent to p_vaddr modulo p_align. + // + // Variant 2. Static TLS blocks, followed by alignment padding are placed + // before TP. The alignment padding is added so that (TP - padding - + // p_memsz) is congruent to p_vaddr modulo p_align. + elf::PhdrEntry *tls = Out::tlsPhdr; switch (config->emachine) { + // Variant 1. case EM_ARM: case EM_AARCH64: - // Variant 1. The thread pointer points to a TCB with a fixed 2-word size, - // followed by a variable amount of alignment padding, followed by the TLS - // segment. - return s.getVA(0) + alignTo(config->wordsize * 2, Out::tlsPhdr->p_align); - case EM_386: - case EM_X86_64: - // Variant 2. The TLS segment is located just before the thread pointer. - return s.getVA(0) - alignTo(Out::tlsPhdr->p_memsz, Out::tlsPhdr->p_align); + return s.getVA(0) + config->wordsize * 2 + + ((tls->p_vaddr - config->wordsize * 2) & (tls->p_align - 1)); case EM_PPC: case EM_PPC64: - // The thread pointer points to a fixed offset from the start of the - // executable's TLS segment. An offset of 0x7000 allows a signed 16-bit - // offset to reach 0x1000 of TCB/thread-library data and 0xf000 of the - // program's TLS segment. - return s.getVA(0) - 0x7000; + // Adjusted Variant 1. TP is placed with a displacement of 0x7000, which is + // to allow a signed 16-bit offset to reach 0x1000 of TCB/thread-library + // data and 0xf000 of the program's TLS segment. + return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; case EM_RISCV: - return s.getVA(0); + return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); + + // Variant 2. + case EM_386: + case EM_X86_64: + return s.getVA(0) - tls->p_memsz - + ((-tls->p_vaddr - tls->p_memsz) & (tls->p_align - 1)); default: llvm_unreachable("unhandled Config->EMachine"); } diff --git a/contrib/llvm-project/lld/ELF/Writer.cpp b/contrib/llvm-project/lld/ELF/Writer.cpp index 10b171e8c0d..410c7107ab9 100644 --- a/contrib/llvm-project/lld/ELF/Writer.cpp +++ b/contrib/llvm-project/lld/ELF/Writer.cpp @@ -2208,21 +2208,65 @@ void Writer::addPhdrForSection(Partition &part, unsigned shType, part.phdrs.push_back(entry); } -// The first section of each PT_LOAD, the first section in PT_GNU_RELRO and the -// first section after PT_GNU_RELRO have to be page aligned so that the dynamic -// linker can set the permissions. +// Place the first section of each PT_LOAD to a different page (of maxPageSize). +// This is achieved by assigning an alignment expression to addrExpr of each +// such section. template void Writer::fixSectionAlignments() { - auto pageAlign = [](OutputSection *cmd) { - if (cmd && !cmd->addrExpr) - cmd->addrExpr = [=] { - return alignTo(script->getDot(), config->maxPageSize); - }; + const PhdrEntry *prev; + auto pageAlign = [&](const PhdrEntry *p) { + OutputSection *cmd = p->firstSec; + if (cmd && !cmd->addrExpr) { + // Prefer advancing to align(dot, maxPageSize) + dot%maxPageSize to avoid + // padding in the file contents. + // + // When -z separate-code is used we must not have any overlap in pages + // between an executable segment and a non-executable segment. We align to + // the next maximum page size boundary on transitions between executable + // and non-executable segments. + // + // TODO Enable this technique on all targets. + bool enable = + config->emachine == EM_AARCH64 || config->emachine == EM_PPC64; + + if (!enable || (config->zSeparateCode && prev && + (prev->p_flags & PF_X) != (p->p_flags & PF_X))) + cmd->addrExpr = [] { + return alignTo(script->getDot(), config->maxPageSize); + }; + // PT_TLS is at the start of the first RW PT_LOAD. If `p` includes PT_TLS, + // it must be the RW. Align to p_align(PT_TLS) to make sure + // p_vaddr(PT_LOAD)%p_align(PT_LOAD) = 0. Otherwise, if + // sh_addralign(.tdata) < sh_addralign(.tbss), we will set p_align(PT_TLS) + // to sh_addralign(.tbss), while p_vaddr(PT_TLS)=p_vaddr(PT_LOAD) may not + // be congruent to 0 modulo p_align(PT_TLS). + // + // Technically this is not required, but as of 2019, some dynamic loaders + // don't handle p_vaddr%p_align != 0 correctly, e.g. glibc (i386 and + // x86-64) doesn't make runtime address congruent to p_vaddr modulo + // p_align for dynamic TLS blocks (PR/24606), FreeBSD rtld has the same + // bug, musl (TLS Variant 1 architectures) before 1.1.23 handled TLS + // blocks correctly. We need to keep the workaround for a while. + else if (Out::tlsPhdr && Out::tlsPhdr->firstSec == p->firstSec) + cmd->addrExpr = [] { + return alignTo(script->getDot(), config->maxPageSize) + + alignTo(script->getDot() % config->maxPageSize, + Out::tlsPhdr->p_align); + }; + else + cmd->addrExpr = [] { + return alignTo(script->getDot(), config->maxPageSize) + + script->getDot() % config->maxPageSize; + }; + } }; - + for (Partition &part : partitions) { + prev = nullptr; for (const PhdrEntry *p : part.phdrs) - if (p->p_type == PT_LOAD && p->firstSec) - pageAlign(p->firstSec); + if (p->p_type == PT_LOAD && p->firstSec) { + pageAlign(p); + prev = p; + } } } @@ -2291,13 +2335,11 @@ template void Writer::assignFileOffsets() { for (OutputSection *sec : outputSections) { off = setFileOffset(sec, off); - if (script->hasSectionsCommand) - continue; // If this is a last section of the last executable segment and that // segment is the last loadable segment, align the offset of the // following section to avoid loading non-segments parts of the file. - if (lastRX && lastRX->lastSec == sec) + if (config->zSeparateCode && lastRX && lastRX->lastSec == sec) off = alignTo(off, config->commonPageSize); } @@ -2352,10 +2394,11 @@ template void Writer::setPhdrs(Partition &part) { p->p_align = std::max(p->p_align, config->maxPageSize); } else if (p->p_type == PT_GNU_RELRO) { p->p_align = 1; - // The glibc dynamic loader rounds the size down, so we need to round up + // musl/glibc ld.so rounds the size down, so we need to round up // to protect the last page. This is a no-op on FreeBSD which always // rounds up. - p->p_memsz = alignTo(p->p_memsz, config->commonPageSize); + p->p_memsz = alignTo(p->p_offset + p->p_memsz, config->commonPageSize) - + p->p_offset; } } } @@ -2570,7 +2613,7 @@ static void fillTrap(uint8_t *i, uint8_t *end) { // We'll leave other pages in segments as-is because the rest will be // overwritten by output sections. template void Writer::writeTrapInstr() { - if (script->hasSectionsCommand) + if (!config->zSeparateCode) return; for (Partition &part : partitions) { diff --git a/libexec/rtld-elf/map_object.c b/libexec/rtld-elf/map_object.c index 094a0c36e07..24812df6724 100644 --- a/libexec/rtld-elf/map_object.c +++ b/libexec/rtld-elf/map_object.c @@ -313,6 +313,7 @@ map_object(int fd, const char *path, const struct stat *sb) obj->tlsindex = ++tls_max_index; obj->tlssize = phtls->p_memsz; obj->tlsalign = phtls->p_align; + obj->tlspoffset = phtls->p_offset; obj->tlsinitsize = phtls->p_filesz; obj->tlsinit = mapbase + phtls->p_vaddr; } diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 3e87e53a315..ea86e437442 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -1496,6 +1496,7 @@ digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path) obj->tlsalign = ph->p_align; obj->tlsinitsize = ph->p_filesz; obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); + obj->tlspoffset = ph->p_offset; break; case PT_GNU_STACK: @@ -4860,7 +4861,7 @@ allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) Elf_Addr addr; Elf_Addr i; size_t extra_size, maxalign, post_size, pre_size, tls_block_size; - size_t tls_init_align; + size_t tls_init_align, tls_init_offset; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); @@ -4877,7 +4878,7 @@ allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) tls_block_size += pre_size + tls_static_space - TLS_TCB_SIZE - post_size; /* Allocate whole TLS block */ - tls_block = malloc_aligned(tls_block_size, maxalign); + tls_block = malloc_aligned(tls_block_size, maxalign, 0); tcb = (Elf_Addr **)(tls_block + pre_size + extra_size); if (oldtcb != NULL) { @@ -4900,16 +4901,22 @@ allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) dtv[1] = tls_max_index; for (obj = globallist_curr(objs); obj != NULL; - obj = globallist_next(obj)) { - if (obj->tlsoffset > 0) { - addr = (Elf_Addr)tcb + obj->tlsoffset; - if (obj->tlsinitsize > 0) - memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); - if (obj->tlssize > obj->tlsinitsize) - memset((void*)(addr + obj->tlsinitsize), 0, - obj->tlssize - obj->tlsinitsize); - dtv[obj->tlsindex + 1] = addr; - } + obj = globallist_next(obj)) { + tls_init_offset = obj->tlspoffset & (obj->tlsalign - 1); + if (tls_init_offset > 0) + memset((void*) addr, 0, tls_init_offset); + if (obj->tlsoffset > 0) { + addr = (Elf_Addr)tcb + obj->tlsoffset; + if (obj->tlsinitsize > 0) { + memcpy((void *)(addr + tls_init_offset), obj->tlsinit, + obj->tlsinitsize); + } + if (obj->tlssize > obj->tlsinitsize) { + memset((void *)(addr + tls_init_offset + obj->tlsinitsize), + 0, obj->tlssize - obj->tlsinitsize - tls_init_offset); + } + dtv[obj->tlsindex + 1] = addr; + } } } @@ -4967,7 +4974,7 @@ allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) size = round(tls_static_space, ralign) + round(tcbsize, ralign); assert(tcbsize >= 2*sizeof(Elf_Addr)); - tls = malloc_aligned(size, ralign); + tls = malloc_aligned(size, ralign, 0 /* XXX */); dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); @@ -5060,25 +5067,24 @@ free_tls(void *tls, size_t tcbsize __unused, size_t tcbalign) void * allocate_module_tls(int index) { - Obj_Entry* obj; - char* p; - - TAILQ_FOREACH(obj, &obj_list, next) { - if (obj->marker) - continue; - if (obj->tlsindex == index) - break; - } - if (!obj) { - _rtld_error("Can't find module with TLS index %d", index); - rtld_die(); - } + Obj_Entry *obj; + char *p; - p = malloc_aligned(obj->tlssize, obj->tlsalign); - memcpy(p, obj->tlsinit, obj->tlsinitsize); - memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); + TAILQ_FOREACH(obj, &obj_list, next) { + if (obj->marker) + continue; + if (obj->tlsindex == index) + break; + } + if (obj == NULL) { + _rtld_error("Can't find module with TLS index %d", index); + rtld_die(); + } - return p; + p = malloc_aligned(obj->tlssize, obj->tlsalign, obj->tlspoffset); + memcpy(p, obj->tlsinit, obj->tlsinitsize); + memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); + return (p); } bool diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h index c1996f04219..de0e4466d81 100644 --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -163,6 +163,7 @@ typedef struct Struct_Obj_Entry { size_t tlssize; /* Size of TLS block for this module */ size_t tlsoffset; /* Offset of static TLS block for this module */ size_t tlsalign; /* Alignment of static TLS block */ + size_t tlspoffset; /* p_offset of the static TLS block */ caddr_t relro_page; size_t relro_size; @@ -360,7 +361,7 @@ Obj_Entry *map_object(int, const char *, const struct stat *); void *xcalloc(size_t, size_t); void *xmalloc(size_t); char *xstrdup(const char *); -void *malloc_aligned(size_t size, size_t align); +void *malloc_aligned(size_t size, size_t align, size_t offset); void free_aligned(void *ptr); extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ diff --git a/libexec/rtld-elf/xmalloc.c b/libexec/rtld-elf/xmalloc.c index 86ec6ea0bf8..18a7be5ef6d 100644 --- a/libexec/rtld-elf/xmalloc.c +++ b/libexec/rtld-elf/xmalloc.c @@ -27,6 +27,7 @@ * $FreeBSD$ */ +#include #include #include #include @@ -76,16 +77,22 @@ xstrdup(const char *str) } void * -malloc_aligned(size_t size, size_t align) +malloc_aligned(size_t size, size_t align, size_t offset) { - void *mem, *res; + char *mem, *res; + uintptr_t x; + offset &= align - 1; if (align < sizeof(void *)) align = sizeof(void *); - mem = xmalloc(size + sizeof(void *) + align - 1); - res = (void *)round((uintptr_t)mem + sizeof(void *), align); - *(void **)((uintptr_t)res - sizeof(void *)) = mem; + mem = xmalloc(roundup(size, align) + roundup(sizeof(void *), + align) + offset); + x = roundup((uintptr_t)mem + sizeof(void *), align); + x += offset; + res = (void *)x; + x -= sizeof(void *); + memcpy((void *)x, &mem, sizeof(mem)); return (res); } @@ -99,6 +106,6 @@ free_aligned(void *ptr) return; x = (uintptr_t)ptr; x -= sizeof(void *); - mem = *(void **)x; + memcpy(&mem, (void *)x, sizeof(mem)); free(mem); }