diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp new file mode 100644 index 000000000000000..26d4e0cb3987da6 --- /dev/null +++ b/lld/MachO/BPSectionOrderer.cpp @@ -0,0 +1,413 @@ +//===- BPSectionOrderer.cpp--------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BPSectionOrderer.h" +#include "InputSection.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BalancedPartitioning.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/xxhash.h" + +#define DEBUG_TYPE "bp-section-orderer" +using namespace llvm; +using namespace lld::macho; + +/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and +/// "yyyy" are numbers that could change between builds. We need to use the root +/// symbol name before this suffix so these symbols can be matched with profiles +/// which may have different suffixes. +static StringRef getRootSymbol(StringRef Name) { + auto [P0, S0] = Name.rsplit(".llvm."); + auto [P1, S1] = P0.rsplit(".__uniq."); + return P1; +} + +static uint64_t getRelocHash(StringRef kind, uint64_t sectionIdx, + uint64_t offset, uint64_t addend) { + return xxHash64((kind + ": " + Twine::utohexstr(sectionIdx) + " + " + + Twine::utohexstr(offset) + " + " + Twine::utohexstr(addend)) + .str()); +} + +static uint64_t +getRelocHash(const Reloc &reloc, + const DenseMap §ionToIdx) { + auto *isec = reloc.getReferentInputSection(); + std::optional sectionIdx; + auto sectionIdxIt = sectionToIdx.find(isec); + if (sectionIdxIt != sectionToIdx.end()) + sectionIdx = sectionIdxIt->getSecond(); + std::string kind; + if (isec) + kind = ("Section " + Twine(isec->kind())).str(); + if (auto *sym = reloc.referent.dyn_cast()) { + kind += (" Symbol " + Twine(sym->kind())).str(); + if (auto *d = dyn_cast(sym)) { + if (isa_and_nonnull(isec)) + return getRelocHash(kind, 0, isec->getOffset(d->value), reloc.addend); + return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend); + } + } + return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend); +} + +static void constructNodesForCompression( + const SmallVector §ions, + const DenseMap §ionToIdx, + const SmallVector §ionIdxs, + std::vector &nodes, + DenseMap> &duplicateSectionIdxs, + BPFunctionNode::UtilityNodeT &maxUN) { + TimeTraceScope timeScope("Build nodes for compression"); + + SmallVector>> sectionHashes; + sectionHashes.reserve(sectionIdxs.size()); + SmallVector hashes; + for (unsigned sectionIdx : sectionIdxs) { + const auto *isec = sections[sectionIdx]; + constexpr unsigned windowSize = 4; + + for (size_t i = 0; i < isec->data.size(); i++) { + auto window = isec->data.drop_front(i).take_front(windowSize); + hashes.push_back(xxHash64(window)); + } + for (const auto &r : isec->relocs) { + if (r.length == 0 || r.referent.isNull() || r.offset >= isec->data.size()) + continue; + uint64_t relocHash = getRelocHash(r, sectionToIdx); + uint32_t start = (r.offset < windowSize) ? 0 : r.offset - windowSize + 1; + for (uint32_t i = start; i < r.offset + r.length; i++) { + auto window = isec->data.drop_front(i).take_front(windowSize); + hashes.push_back(xxHash64(window) + relocHash); + } + } + + llvm::sort(hashes); + hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end()); + + sectionHashes.emplace_back(sectionIdx, hashes); + hashes.clear(); + } + + DenseMap hashFrequency; + for (auto &[sectionIdx, hashes] : sectionHashes) + for (auto hash : hashes) + ++hashFrequency[hash]; + + // Merge section that are nearly identical + SmallVector>> newSectionHashes; + DenseMap wholeHashToSectionIdx; + for (auto &[sectionIdx, hashes] : sectionHashes) { + uint64_t wholeHash = 0; + for (auto hash : hashes) + if (hashFrequency[hash] > 5) + wholeHash ^= hash; + auto [it, wasInserted] = + wholeHashToSectionIdx.insert(std::make_pair(wholeHash, sectionIdx)); + if (wasInserted) { + newSectionHashes.emplace_back(sectionIdx, hashes); + } else { + duplicateSectionIdxs[it->getSecond()].push_back(sectionIdx); + } + } + sectionHashes = newSectionHashes; + + // Recompute hash frequencies + hashFrequency.clear(); + for (auto &[sectionIdx, hashes] : sectionHashes) + for (auto hash : hashes) + ++hashFrequency[hash]; + + // Filter rare and common hashes and assign each a unique utility node that + // doesn't conflict with the trace utility nodes + DenseMap hashToUN; + for (auto &[hash, frequency] : hashFrequency) { + if (frequency <= 1 || frequency * 2 > wholeHashToSectionIdx.size()) + continue; + hashToUN[hash] = ++maxUN; + } + + std::vector uns; + for (auto &[sectionIdx, hashes] : sectionHashes) { + for (auto &hash : hashes) { + auto it = hashToUN.find(hash); + if (it != hashToUN.end()) + uns.push_back(it->second); + } + nodes.emplace_back(sectionIdx, uns); + uns.clear(); + } +} + +DenseMap lld::macho::runBalancedPartitioning( + size_t &highestAvailablePriority, StringRef profilePath, + bool forFunctionCompression, bool forDataCompression, bool verbose) { + + SmallVector sections; + DenseMap sectionToIdx; + StringMap> symbolToSectionIdxs; + for (const auto *file : inputFiles) { + for (auto *sec : file->sections) { + for (auto &subsec : sec->subsections) { + auto *isec = subsec.isec; + if (!isec || isec->data.empty() || !isec->data.data()) + continue; + unsigned sectionIdx = sections.size(); + sectionToIdx.try_emplace(isec, sectionIdx); + sections.push_back(isec); + for (Symbol *sym : isec->symbols) + if (auto *d = dyn_cast_or_null(sym)) + symbolToSectionIdxs[d->getName()].insert(sectionIdx); + } + } + } + + StringMap> rootSymbolToSectionIdxs; + for (auto &entry : symbolToSectionIdxs) { + StringRef name = entry.getKey(); + auto §ionIdxs = entry.getValue(); + name = getRootSymbol(name); + rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), + sectionIdxs.end()); + // Linkage names can be prefixed with "_" or "l_" on Mach-O. See + // Mangler::getNameWithPrefix() for details. + if (name.consume_front("_") || name.consume_front("l_")) + rootSymbolToSectionIdxs[name].insert(sectionIdxs.begin(), + sectionIdxs.end()); + } + + std::vector nodesForStartup; + BPFunctionNode::UtilityNodeT maxUN = 0; + DenseMap> + startupSectionIdxUNs; + std::unique_ptr reader; + if (!profilePath.empty()) { + auto fs = vfs::getRealFileSystem(); + auto readerOrErr = InstrProfReader::create(profilePath, *fs); + lld::checkError(readerOrErr.takeError()); + + reader = std::move(readerOrErr.get()); + for (auto &entry : *reader) { + // Read all entries + (void)entry; + } + auto &traces = reader->getTemporalProfTraces(); + + // Used to define the initial order for startup functions. + DenseMap sectionIdxToTimestamp; + DenseMap sectionIdxToFirstUN; + for (size_t traceIdx = 0; traceIdx < traces.size(); traceIdx++) { + uint64_t currentSize = 0, cutoffSize = 1; + size_t cutoffTimestamp = 1; + auto &trace = traces[traceIdx].FunctionNameRefs; + for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) { + auto [Filename, ParsedFuncName] = getParsedIRPGOName( + reader->getSymtab().getFuncOrVarName(trace[timestamp])); + ParsedFuncName = getRootSymbol(ParsedFuncName); + + auto sectionIdxsIt = rootSymbolToSectionIdxs.find(ParsedFuncName); + if (sectionIdxsIt == rootSymbolToSectionIdxs.end()) + continue; + auto §ionIdxs = sectionIdxsIt->getValue(); + // If the same symbol is found in multiple sections, they might be + // identical, so we arbitrarily use the size from the first section. + currentSize += sections[*sectionIdxs.begin()]->getSize(); + + // Since BalancedPartitioning is sensitive to the initial order, we need + // to explicitly define it to be ordered by earliest timestamp. + for (unsigned sectionIdx : sectionIdxs) { + auto [it, wasInserted] = + sectionIdxToTimestamp.try_emplace(sectionIdx, timestamp); + if (!wasInserted) + it->getSecond() = std::min(it->getSecond(), timestamp); + } + + if (timestamp >= cutoffTimestamp || currentSize >= cutoffSize) { + ++maxUN; + cutoffSize = 2 * currentSize; + cutoffTimestamp = 2 * cutoffTimestamp; + } + for (unsigned sectionIdx : sectionIdxs) + sectionIdxToFirstUN.try_emplace(sectionIdx, maxUN); + } + for (auto &[sectionIdx, firstUN] : sectionIdxToFirstUN) + for (auto un = firstUN; un <= maxUN; ++un) + startupSectionIdxUNs[sectionIdx].push_back(un); + ++maxUN; + sectionIdxToFirstUN.clear(); + } + + // These uns should already be sorted without duplicates. + for (auto &[sectionIdx, uns] : startupSectionIdxUNs) + nodesForStartup.emplace_back(sectionIdx, uns); + + llvm::sort(nodesForStartup, [§ionIdxToTimestamp](auto &L, auto &R) { + return std::make_pair(sectionIdxToTimestamp[L.Id], L.Id) < + std::make_pair(sectionIdxToTimestamp[R.Id], R.Id); + }); + } + + SmallVector sectionIdxsForFunctionCompression, + sectionIdxsForDataCompression; + for (unsigned sectionIdx = 0; sectionIdx < sections.size(); sectionIdx++) { + if (startupSectionIdxUNs.count(sectionIdx)) + continue; + const auto *isec = sections[sectionIdx]; + if (isCodeSection(isec)) { + if (forFunctionCompression) + sectionIdxsForFunctionCompression.push_back(sectionIdx); + } else { + if (forDataCompression) + sectionIdxsForDataCompression.push_back(sectionIdx); + } + } + + std::vector nodesForFunctionCompression, + nodesForDataCompression; + // Map a section index (to be ordered for compression) to a list of duplicate + // section indices (not ordered for compression). + DenseMap> duplicateFunctionSectionIdxs, + duplicateDataSectionIdxs; + constructNodesForCompression( + sections, sectionToIdx, sectionIdxsForFunctionCompression, + nodesForFunctionCompression, duplicateFunctionSectionIdxs, maxUN); + constructNodesForCompression( + sections, sectionToIdx, sectionIdxsForDataCompression, + nodesForDataCompression, duplicateDataSectionIdxs, maxUN); + + // Sort nodes by their Id (which is the section index) because the input + // linker order tends to be not bad + llvm::sort(nodesForFunctionCompression, + [](auto &L, auto &R) { return L.Id < R.Id; }); + llvm::sort(nodesForDataCompression, + [](auto &L, auto &R) { return L.Id < R.Id; }); + + { + TimeTraceScope timeScope("Balanced Partitioning"); + BalancedPartitioningConfig config; + BalancedPartitioning bp(config); + bp.run(nodesForStartup); + bp.run(nodesForFunctionCompression); + bp.run(nodesForDataCompression); + } + + unsigned numStartupSections = 0; + unsigned numCodeCompressionSections = 0; + unsigned numDuplicateCodeSections = 0; + unsigned numDataCompressionSections = 0; + unsigned numDuplicateDataSections = 0; + SetVector orderedSections; + // Order startup functions, + for (auto &node : nodesForStartup) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numStartupSections; + } + // then functions for compression, + for (auto &node : nodesForFunctionCompression) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numCodeCompressionSections; + + auto It = duplicateFunctionSectionIdxs.find(node.Id); + if (It == duplicateFunctionSectionIdxs.end()) + continue; + for (auto dupSecIdx : It->getSecond()) { + const auto *dupIsec = sections[dupSecIdx]; + if (orderedSections.insert(dupIsec)) + ++numDuplicateCodeSections; + } + } + // then data for compression. + for (auto &node : nodesForDataCompression) { + const auto *isec = sections[node.Id]; + if (orderedSections.insert(isec)) + ++numDataCompressionSections; + auto It = duplicateDataSectionIdxs.find(node.Id); + if (It == duplicateDataSectionIdxs.end()) + continue; + for (auto dupSecIdx : It->getSecond()) { + const auto *dupIsec = sections[dupSecIdx]; + if (orderedSections.insert(dupIsec)) + ++numDuplicateDataSections; + } + } + + if (verbose) { + unsigned numTotalOrderedSections = + numStartupSections + numCodeCompressionSections + + numDuplicateCodeSections + numDataCompressionSections + + numDuplicateDataSections; + dbgs() + << "Ordered " << numTotalOrderedSections + << " sections using balanced partitioning:\n Functions for startup: " + << numStartupSections + << "\n Functions for compression: " << numCodeCompressionSections + << "\n Duplicate functions: " << numDuplicateCodeSections + << "\n Data for compression: " << numDataCompressionSections + << "\n Duplicate data: " << numDuplicateDataSections << "\n"; + + if (!profilePath.empty()) { + // Evaluate this function order for startup + StringMap> symbolToPageNumbers; + const uint64_t pageSize = (1 << 14); + uint64_t currentAddress = 0; + for (const auto *isec : orderedSections) { + for (Symbol *sym : isec->symbols) { + if (auto *d = dyn_cast_or_null(sym)) { + uint64_t startAddress = currentAddress + d->value; + uint64_t endAddress = startAddress + d->size; + uint64_t firstPage = startAddress / pageSize; + // I think the kernel might pull in a few pages when one it touched, + // so it might be more accurate to force lastPage to be aligned by + // 4? + uint64_t lastPage = endAddress / pageSize; + StringRef rootSymbol = d->getName(); + rootSymbol = getRootSymbol(rootSymbol); + symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); + if (rootSymbol.consume_front("_") || rootSymbol.consume_front("l_")) + symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage); + } + } + + currentAddress += isec->getSize(); + } + + // The area under the curve F where F(t) is the total number of page + // faults at step t. + unsigned area = 0; + for (auto &trace : reader->getTemporalProfTraces()) { + SmallSet touchedPages; + for (unsigned step = 0; step < trace.FunctionNameRefs.size(); step++) { + auto traceId = trace.FunctionNameRefs[step]; + auto [Filename, ParsedFuncName] = + getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId)); + ParsedFuncName = getRootSymbol(ParsedFuncName); + auto it = symbolToPageNumbers.find(ParsedFuncName); + if (it != symbolToPageNumbers.end()) { + auto &[firstPage, lastPage] = it->getValue(); + for (uint64_t i = firstPage; i <= lastPage; i++) + touchedPages.insert(i); + } + area += touchedPages.size(); + } + } + dbgs() << "Total area under the page fault curve: " << (float)area + << "\n"; + } + } + + DenseMap sectionPriorities; + for (const auto *isec : orderedSections) + sectionPriorities[isec] = --highestAvailablePriority; + return sectionPriorities; +} diff --git a/lld/MachO/BPSectionOrderer.h b/lld/MachO/BPSectionOrderer.h new file mode 100644 index 000000000000000..6f9eefd5d82beb6 --- /dev/null +++ b/lld/MachO/BPSectionOrderer.h @@ -0,0 +1,37 @@ +//===- BPSectionOrderer.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file uses Balanced Partitioning to order sections to improve startup +/// time and compressed size. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_BPSECTION_ORDERER_H +#define LLD_MACHO_BPSECTION_ORDERER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" + +namespace lld::macho { + +class InputSection; + +/// Run Balanced Partitioning to find the optimal function and data order to +/// improve startup time and compressed size. +/// +/// It is important that .subsections_via_symbols is used to ensure functions +/// and data are in their own sections and thus can be reordered. +llvm::DenseMap +runBalancedPartitioning(size_t &highestAvailablePriority, + llvm::StringRef profilePath, + bool forFunctionCompression, bool forDataCompression, + bool verbose); + +} // namespace lld::macho + +#endif diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt index 0b92488b00beac7..8b7183e4ec496d8 100644 --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -25,6 +25,7 @@ add_lld_library(lldMachO OutputSection.cpp OutputSegment.cpp Relocations.cpp + BPSectionOrderer.cpp SectionPriorities.cpp SymbolTable.cpp Symbols.cpp @@ -47,6 +48,7 @@ add_lld_library(lldMachO Object Option Passes + ProfileData Support TargetParser TextAPI diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index e79812b16ec1282..5beb0662ba72741 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -217,6 +217,11 @@ struct Configuration { bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; + llvm::StringRef irpgoProfileSortProfilePath; + bool functionOrderForCompression = false; + bool dataOrderForCompression = false; + bool verboseBpSectionOrderer = false; + SectionRenameMap sectionRenameMap; SegmentRenameMap segmentRenameMap; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index dc9d635b48ec460..c238fd52218a08a 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1736,6 +1736,34 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, OPT_no_warn_thin_archive_missing_members, true); config->generateUuid = !args.hasArg(OPT_no_uuid); + auto IncompatWithCGSort = [&](StringRef firstArgStr) { + // Throw an error only if --call-graph-profile-sort is explicitly specified + if (config->callGraphProfileSort) + if (const Arg *arg = args.getLastArgNoClaim(OPT_call_graph_profile_sort)) + error(firstArgStr + " is incompatible with " + arg->getSpelling()); + }; + if (const Arg *arg = args.getLastArg(OPT_irpgo_profile_sort)) { + config->irpgoProfileSortProfilePath = arg->getValue(); + IncompatWithCGSort(arg->getSpelling()); + } + if (const Arg *arg = args.getLastArg(OPT_compression_sort)) { + StringRef compressionSortStr = arg->getValue(); + if (compressionSortStr == "function") { + config->functionOrderForCompression = true; + } else if (compressionSortStr == "data") { + config->dataOrderForCompression = true; + } else if (compressionSortStr == "both") { + config->functionOrderForCompression = true; + config->dataOrderForCompression = true; + } else if (compressionSortStr != "none") { + error("unknown value `" + compressionSortStr + "` for " + + arg->getSpelling()); + } + if (compressionSortStr != "none") + IncompatWithCGSort(arg->getSpelling()); + } + config->verboseBpSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer); + for (const Arg *arg : args.filtered(OPT_alias)) { config->aliasedSymbols.push_back( std::make_pair(arg->getValue(0), arg->getValue(1))); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index bbd8bf70c3a0c54..75bfaed9e4c08c2 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -126,6 +126,16 @@ def no_call_graph_profile_sort : Flag<["--"], "no-call-graph-profile-sort">, def print_symbol_order_eq: Joined<["--"], "print-symbol-order=">, HelpText<"Print a symbol order specified by --call-graph-profile-sort into the specified file">, Group; +def irpgo_profile_sort: Joined<["--"], "irpgo-profile-sort=">, + MetaVarName<"">, + HelpText<"Read the IRPGO profile at to order sections to improve startup time">, + Group; +def compression_sort: Joined<["--"], "compression-sort=">, + MetaVarName<"[none,function,data,both]">, + HelpText<"Order sections to improve compressed size">, Group; +def verbose_bp_section_orderer: Flag<["--"], "verbose-bp-section-orderer">, + HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">, + Group; def ignore_auto_link_option : Separate<["--"], "ignore-auto-link-option">, Group; def ignore_auto_link_option_eq : Joined<["--"], "ignore-auto-link-option=">, diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp index 907aee29d2386f4..69c301d8ff8a713 100644 --- a/lld/MachO/SectionPriorities.cpp +++ b/lld/MachO/SectionPriorities.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SectionPriorities.h" +#include "BPSectionOrderer.h" #include "Config.h" #include "InputFiles.h" #include "Symbols.h" @@ -352,7 +353,14 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) { DenseMap macho::PriorityBuilder::buildInputSectionPriorities() { DenseMap sectionPriorities; - if (config->callGraphProfileSort) { + if (!config->irpgoProfileSortProfilePath.empty() || + config->functionOrderForCompression || config->dataOrderForCompression) { + TimeTraceScope timeScope("Balanced Partitioning Section Orderer"); + sectionPriorities = runBalancedPartitioning( + highestAvailablePriority, config->irpgoProfileSortProfilePath, + config->functionOrderForCompression, config->dataOrderForCompression, + config->verboseBpSectionOrderer); + } else if (config->callGraphProfileSort) { // Sort sections by the profile data provided by __LLVM,__cg_profile // sections. // diff --git a/lld/test/MachO/bp-section-orderer-errs.s b/lld/test/MachO/bp-section-orderer-errs.s new file mode 100644 index 000000000000000..f248b860ce5dcc7 --- /dev/null +++ b/lld/test/MachO/bp-section-orderer-errs.s @@ -0,0 +1,8 @@ +# RUN: not %lld -o /dev/null --irpgo-profile-sort=%s --call-graph-profile-sort 2>&1 | FileCheck %s --check-prefix=IRPGO-ERR +# IRPGO-ERR: --irpgo-profile-sort= is incompatible with --call-graph-profile-sort + +# RUN: not %lld -o /dev/null --compression-sort=function --call-graph-profile-sort %s 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ERR +# COMPRESSION-ERR: --compression-sort= is incompatible with --call-graph-profile-sort + +# RUN: not %lld -o /dev/null --compression-sort=malformed 2>&1 | FileCheck %s --check-prefix=COMPRESSION-MALFORM +# COMPRESSION-MALFORM: unknown value `malformed` for --compression-sort= diff --git a/lld/test/MachO/bp-section-orderer-stress.s b/lld/test/MachO/bp-section-orderer-stress.s new file mode 100644 index 000000000000000..fdc6a20e2655b96 --- /dev/null +++ b/lld/test/MachO/bp-section-orderer-stress.s @@ -0,0 +1,105 @@ +# REQUIRES: aarch64 + +# Generate a large test case and check that the output is deterministic. + +# RUN: %python %s %t.s %t.proftext + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t.s -o %t.o +# RUN: llvm-profdata merge %t.proftext -o %t.profdata + +# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt +# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt +# RUN: diff %t.order1.txt %t.order2.txt + +import random +import sys + +assembly_filepath = sys.argv[1] +proftext_filepath = sys.argv[2] + +random.seed(1234) +num_functions = 1000 +num_data = 100 +num_traces = 10 + +function_names = [f"f{n}" for n in range(num_functions)] +data_names = [f"d{n}" for n in range(num_data)] +profiled_functions = function_names[: int(num_functions / 2)] + +function_contents = [ + f""" +{name}: + add w0, w0, #{i % 4096} + add w1, w1, #{i % 10} + add w2, w0, #{i % 20} + adrp x3, {name}@PAGE + ret +""" + for i, name in enumerate(function_names) +] + +data_contents = [ + f""" +{name}: + .ascii "s{i % 2}-{i % 3}-{i % 5}" + .xword {name} +""" + for i, name in enumerate(data_names) +] + +trace_contents = [ + f""" +# Weight +1 +{", ".join(random.sample(profiled_functions, len(profiled_functions)))} +""" + for i in range(num_traces) +] + +profile_contents = [ + f""" +{name} +# Func Hash: +{i} +# Num Counters: +1 +# Counter Values: +1 +""" + for i, name in enumerate(profiled_functions) +] + +with open(assembly_filepath, "w") as f: + f.write( + f""" +.text +.globl _main + +_main: + ret + +{"".join(function_contents)} + +.data +{"".join(data_contents)} + +.subsections_via_symbols +""" + ) + +with open(proftext_filepath, "w") as f: + f.write( + f""" +:ir +:temporal_prof_traces + +# Num Traces +{num_traces} +# Trace Stream Size: +{num_traces} + +{"".join(trace_contents)} + +{"".join(profile_contents)} +""" + ) diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s new file mode 100644 index 000000000000000..407787025150d2a --- /dev/null +++ b/lld/test/MachO/bp-section-orderer.s @@ -0,0 +1,123 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP + +# STARTUP: Ordered 3 sections using balanced partitioning + +# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o --irpgo-profile-sort=%t/a.profdata -order_file %t/a.orderfile | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE + +# ORDERFILE: A +# ORDERFILE: F +# ORDERFILE: E +# ORDERFILE: D +# ORDERFILE-DAG: _main +# ORDERFILE-DAG: _B +# ORDERFILE-DAG: l_C +# ORDERFILE-DAG: s1 +# ORDERFILE-DAG: s2 +# ORDERFILE-DAG: r1 +# ORDERFILE-DAG: r2 + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH + +# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning +# COMPRESSION-DATA: Ordered 4 sections using balanced partitioning +# COMPRESSION-BOTH: Ordered 11 sections using balanced partitioning + +#--- a.s +.text +.globl _main, A, _B, l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + +_main: + ret +A: + ret +_B: + add w0, w0, #1 + bl A + ret +l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222: + add w0, w0, #2 + bl A + ret +D: + add w0, w0, #2 + bl _B + ret +E: + add w0, w0, #2 + bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret +F: + add w0, w0, #3 + bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret + +.data +s1: + .ascii "hello world" +s2: + .ascii "i am a string" +r1: + .quad s1 +r2: + .quad r1 + +.subsections_via_symbols + +#--- a.proftext +:ir +:temporal_prof_traces +# Num Traces +1 +# Trace Stream Size: +1 +# Weight +1 +A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666 + +A +# Func Hash: +1111 +# Num Counters: +1 +# Counter Values: +1 + +B +# Func Hash: +2222 +# Num Counters: +1 +# Counter Values: +1 + +C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666 +# Func Hash: +3333 +# Num Counters: +1 +# Counter Values: +1 + +D +# Func Hash: +4444 +# Num Counters: +1 +# Counter Values: +1 + +#--- a.orderfile +A +F +E +D