-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lld][ELF] Support relax R_LARCH_ALIGN #78692
Conversation
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Jinyang He (MQ-mengqing) ChangesRefer to commit 6611d58 ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse 1, The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30. For reliability, only one relax pass is supported in this patch. 2, Relax R_LARCH_GOT_PC_{HI20,LO12} to R_LARCH_PCALA_{HI20,LO12} if the sym is a local got. And it can perform the next relaxation rule. 3, Relax R_LARCH_PCALA_{HI20,LO12} to R_LARCH_PCREL20_S2 if the dest is 4-bytes aligned and (dest - pc) is a 22bits-simm. Patch is 24.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78692.diff 8 Files Affected:
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index ab2ec5b447d000..235bd0f783d8e8 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -36,6 +36,8 @@ class LoongArch final : public TargetInfo {
bool usesOnlyLowPageBits(RelType type) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
+ bool relaxOnce(int pass) const override;
+ void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
@@ -48,6 +50,8 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
+ PCADDI = 0x18000000,
+ PCALAU12I = 0x1a000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
@@ -465,8 +469,9 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
case R_LARCH_TLS_GD_HI20:
return R_TLSGD_GOT;
case R_LARCH_RELAX:
- // LoongArch linker relaxation is not implemented yet.
- return R_NONE;
+ return config->relax ? R_RELAX_HINT : R_NONE;
+ case R_LARCH_ALIGN:
+ return R_RELAX_HINT;
// Other known relocs that are explicitly unimplemented:
//
@@ -659,6 +664,252 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}
+// Relax R_LARCH_PCALA_{HI20,LO12}
+// pcalau12i+addi.{d,w} to pcaddi
+static void relaxPcalaAddi(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, Relocation &rl, uint32_t &remove) {
+ const Symbol &sym = *r.sym;
+ const uint32_t pca = read64le(sec.content().data() + r.offset);
+ const uint32_t adi = read64le(sec.content().data() + rl.offset);
+ const uint32_t rd = pca & 0x1f;
+ const uint32_t op_addi = config->is64 ? ADDI_D : ADDI_W;
+ const uint64_t dest = sym.getVA() + r.addend;
+ const int64_t displace = dest - loc;
+
+ if (!(dest & 0x3) && isInt<22>(displace) && rd == (adi & 0x1f) &&
+ rd == ((adi >> 5) & 0x1f) && rl.offset == r.offset + 4 &&
+ (adi & 0xffc00000) == op_addi) {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCREL20_S2;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_RELAX;
+ sec.relaxAux->writes.push_back(PCADDI | rd);
+ remove = 4;
+ }
+}
+
+// Relax R_LARCH_GOT_PC_{HI20,LO12}
+// pcalau12i+ld.{d,w} to pcalau12i+addi.{d,w} or pcaddi
+static void relaxPcalaLd(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, Relocation &rl, uint32_t &remove) {
+ const Symbol &sym = *r.sym;
+ const uint32_t pca = read64le(sec.content().data() + r.offset);
+ const uint32_t ld = read64le(sec.content().data() + rl.offset);
+ const uint32_t rd = pca & 0x1f;
+ const uint32_t op_addi = config->is64 ? ADDI_D : ADDI_W;
+ const uint32_t op_ld = config->is64 ? LD_D : LD_W;
+ const uint64_t dest = sym.getVA() + r.addend;
+ const int64_t displace = dest - loc;
+
+ if (!sym.isLocal())
+ return;
+
+ if (rd != (ld & 0x1f) || rd != ((ld >> 5) & 0x1f) ||
+ rl.offset != r.offset + 4 || (ld & 0xffc00000) != op_ld)
+ return;
+
+ if (!(dest & 0x3) && isInt<22>(displace)) {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCREL20_S2;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_RELAX;
+ sec.relaxAux->writes.push_back(PCADDI | rd);
+ remove = 4;
+ } else {
+ sec.relaxAux->relocTypes[i] = R_LARCH_PCALA_HI20;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCALA_LO12;
+ sec.relaxAux->writes.push_back(PCALAU12I | rd);
+ sec.relaxAux->writes.push_back(op_addi | (rd << 5) | rd);
+ }
+}
+
+static bool relax(InputSection &sec) {
+ const uint64_t secAddr = sec.getVA();
+ auto &aux = *sec.relaxAux;
+ bool changed = false;
+ ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
+ uint64_t delta = 0;
+
+ std::fill_n(aux.relocTypes.get(), sec.relocs().size(), R_LARCH_NONE);
+ aux.writes.clear();
+ for (auto [i, r] : llvm::enumerate(sec.relocs())) {
+ const uint64_t loc = secAddr + r.offset - delta;
+ uint32_t &cur = aux.relocDeltas[i], remove = 0;
+ switch (r.type) {
+ case R_LARCH_ALIGN: {
+ const uint64_t addend = r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
+ const uint64_t allBytes = (1 << (addend & 0xff)) - 4;
+ const uint64_t align = 1 << (addend & 0xff);
+ const uint64_t maxBytes = addend >> 8;
+ const uint64_t off = loc & (align - 1);
+ const uint64_t curBytes = off == 0 ? 0 : align - off;
+ // All bytes beyond the alignment boundary should be removed.
+ // If emit bytes more than max bytes to emit, remove all.
+ if (maxBytes != 0 && curBytes > maxBytes)
+ remove = allBytes;
+ else
+ remove = allBytes - curBytes;
+ assert(static_cast<int32_t>(remove) >= 0 &&
+ "R_LARCH_ALIGN needs expanding the content");
+ break;
+ }
+ case R_LARCH_PCALA_HI20:
+ if (i + 3 < sec.relocs().size() &&
+ sec.relocs()[i + 1].type == R_LARCH_RELAX &&
+ sec.relocs()[i + 2].type == R_LARCH_PCALA_LO12 &&
+ sec.relocs()[i + 3].type == R_LARCH_RELAX)
+ relaxPcalaAddi(sec, i, loc, r, sec.relocs()[i + 2], remove);
+ break;
+ case R_LARCH_GOT_PC_HI20:
+ if (i + 3 < sec.relocs().size() &&
+ sec.relocs()[i + 1].type == R_LARCH_RELAX &&
+ sec.relocs()[i + 2].type == R_LARCH_GOT_PC_LO12 &&
+ sec.relocs()[i + 3].type == R_LARCH_RELAX)
+ relaxPcalaLd(sec, i, loc, r, sec.relocs()[i + 2], remove);
+ break;
+ }
+
+ // For all anchors whose offsets are <= r.offset, they are preceded by
+ // the previous relocation whose `relocDeltas` value equals `delta`.
+ // Decrease their st_value and update their st_size.
+ for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
+ if (sa[0].end)
+ sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
+ else
+ sa[0].d->value = sa[0].offset - delta;
+ }
+ delta += remove;
+ if (delta != cur) {
+ cur = delta;
+ changed = true;
+ }
+ }
+
+ for (const SymbolAnchor &a : sa) {
+ if (a.end)
+ a.d->size = a.offset - delta - a.d->value;
+ else
+ a.d->value = a.offset - delta;
+ }
+ // Inform assignAddresses that the size has changed.
+ if (!isUInt<32>(delta))
+ fatal("section size decrease is too large: " + Twine(delta));
+ sec.bytesDropped = delta;
+ return changed;
+}
+
+// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
+// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
+// shrinkage may reduce displacement and make more relocations eligible for
+// relaxation. Code shrinkage may increase displacement to a call/load/store
+// target at a higher fixed address, invalidating an earlier relaxation. Any
+// change in section sizes can have cascading effect and require another
+// relaxation pass.
+bool LoongArch::relaxOnce(int pass) const {
+ if (config->relocatable)
+ return false;
+
+ // TODO Relax multiple times.
+ if (pass > 0)
+ return false;
+
+ initSymbolAnchors();
+ SmallVector<InputSection *, 0> storage;
+ bool changed = false;
+ for (OutputSection *osec : outputSections) {
+ if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+ for (InputSection *sec : getInputSections(*osec, storage))
+ changed |= relax(*sec);
+ }
+ return changed;
+}
+
+void LoongArch::finalizeRelax(int passes) const {
+ SmallVector<InputSection *, 0> storage;
+ for (OutputSection *osec : outputSections) {
+ if (!(osec->flags & SHF_EXECINSTR))
+ continue;
+ for (InputSection *sec : getInputSections(*osec, storage)) {
+ RelaxAux &aux = *sec->relaxAux;
+ if (!aux.relocDeltas)
+ continue;
+
+ MutableArrayRef<Relocation> rels = sec->relocs();
+ ArrayRef<uint8_t> old = sec->content();
+ size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
+ size_t writesIdx = 0;
+ uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
+ uint64_t offset = 0;
+ int64_t delta = 0;
+ sec->content_ = p;
+ sec->size = newSize;
+ sec->bytesDropped = 0;
+
+ // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
+ // instructions for relaxed relocations.
+ for (size_t i = 0, e = rels.size(); i != e; ++i) {
+ uint32_t remove = aux.relocDeltas[i] - delta;
+ delta = aux.relocDeltas[i];
+ if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
+ continue;
+
+ // Copy from last location to the current relocated location.
+ const Relocation &r = rels[i];
+ uint64_t size = r.offset - offset;
+
+ // The rel.type may be fixed to R_LARCH_RELAX and the instruction
+ // will be deleted, then cause size<0
+ if (remove == 0 && (int64_t)size < 0) {
+ assert(aux.relocTypes[i] == R_LARCH_RELAX && "Unexpected size");
+ continue;
+ }
+
+ memcpy(p, old.data() + offset, size);
+ p += size;
+
+ int64_t skip = 0;
+ if (RelType newType = aux.relocTypes[i]) {
+ switch (newType) {
+ case R_LARCH_RELAX:
+ // Indicate the relocation is ignored.
+ break;
+ case R_LARCH_PCREL20_S2:
+ case R_LARCH_PCALA_HI20:
+ case R_LARCH_PCALA_LO12:
+ skip = 4;
+ write32le(p, aux.writes[writesIdx++]);
+ break;
+ default:
+ llvm_unreachable("unsupported type");
+ }
+ }
+
+ p += skip;
+ offset = r.offset + skip + remove;
+ }
+ memcpy(p, old.data() + offset, old.size() - offset);
+
+ // Subtract the previous relocDeltas value from the relocation offset.
+ // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
+ // their r_offset by the same delta.
+ delta = 0;
+ for (size_t i = 0, e = rels.size(); i != e;) {
+ uint64_t cur = rels[i].offset;
+ do {
+ rels[i].offset -= delta;
+ // Fix expr so that getRelocTargetVA will get update value.
+ if (aux.relocTypes[i] == R_LARCH_PCREL20_S2)
+ rels[i].expr = R_PC;
+ else if (aux.relocTypes[i] == R_LARCH_PCALA_HI20)
+ rels[i].expr = R_LOONGARCH_PAGE_PC;
+ else if (aux.relocTypes[i] == R_LARCH_PCALA_LO12)
+ rels[i].expr = R_ABS;
+ if (aux.relocTypes[i] != R_LARCH_NONE)
+ rels[i].type = aux.relocTypes[i];
+ } while (++i != e && rels[i].offset == cur);
+ delta = aux.relocDeltas[i - 1];
+ }
+ }
+ }
+}
+
TargetInfo *elf::getLoongArchTargetInfo() {
static LoongArch target;
return ⌖
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index d7d3d3e4781497..4536236ff85e89 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -45,6 +45,7 @@ class RISCV final : public TargetInfo {
uint64_t val) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
bool relaxOnce(int pass) const override;
+ void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
@@ -560,33 +561,13 @@ void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
}
}
-namespace {
-struct SymbolAnchor {
- uint64_t offset;
- Defined *d;
- bool end; // true for the anchor of st_value+st_size
-};
-} // namespace
-
-struct elf::RISCVRelaxAux {
- // This records symbol start and end offsets which will be adjusted according
- // to the nearest relocDeltas element.
- SmallVector<SymbolAnchor, 0> anchors;
- // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
- // 0).
- std::unique_ptr<uint32_t[]> relocDeltas;
- // For relocations[i], the actual type is relocTypes[i].
- std::unique_ptr<RelType[]> relocTypes;
- SmallVector<uint32_t, 0> writes;
-};
-
-static void initSymbolAnchors() {
+void elf::initSymbolAnchors() {
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : outputSections) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
- sec->relaxAux = make<RISCVRelaxAux>();
+ sec->relaxAux = make<RelaxAux>();
if (sec->relocs().size()) {
sec->relaxAux->relocDeltas =
std::make_unique<uint32_t[]>(sec->relocs().size());
@@ -819,7 +800,7 @@ bool RISCV::relaxOnce(int pass) const {
return changed;
}
-void elf::riscvFinalizeRelax(int passes) {
+void RISCV::finalizeRelax(int passes) const {
llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
log("relaxation passes: " + Twine(passes));
SmallVector<InputSection *, 0> storage;
@@ -827,7 +808,7 @@ void elf::riscvFinalizeRelax(int passes) {
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
- RISCVRelaxAux &aux = *sec->relaxAux;
+ RelaxAux &aux = *sec->relaxAux;
if (!aux.relocDeltas)
continue;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 586404643cc1fd..80421f124ecba7 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -352,9 +352,10 @@ InputSectionBase *InputSection::getRelocatedSection() const {
template <class ELFT, class RelTy>
void InputSection::copyRelocations(uint8_t *buf) {
- if (config->relax && !config->relocatable && config->emachine == EM_RISCV) {
- // On RISC-V, relaxation might change relocations: copy from internal ones
- // that are updated by relaxation.
+ if (config->relax && !config->relocatable &&
+ (config->emachine == EM_RISCV || config->emachine == EM_LOONGARCH)) {
+ // On LoongArch and RISC-V, relaxation might change relocations: copy
+ // from internal ones that are updated by relaxation.
InputSectionBase *sec = getRelocatedSection();
copyRelocations<ELFT, RelTy>(buf, llvm::make_range(sec->relocations.begin(),
sec->relocations.end()));
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index fbaea57bd586b0..9fbe85fce1f24c 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -101,7 +101,23 @@ class SectionBase {
link(link), info(info) {}
};
-struct RISCVRelaxAux;
+struct SymbolAnchor {
+ uint64_t offset;
+ Defined *d;
+ bool end; // true for the anchor of st_value+st_size
+};
+
+struct RelaxAux {
+ // This records symbol start and end offsets which will be adjusted according
+ // to the nearest relocDeltas element.
+ SmallVector<SymbolAnchor, 0> anchors;
+ // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
+ // 0).
+ std::unique_ptr<uint32_t[]> relocDeltas;
+ // For relocations[i], the actual type is relocTypes[i].
+ std::unique_ptr<RelType[]> relocTypes;
+ SmallVector<uint32_t, 0> writes;
+};
// This corresponds to a section of an input file.
class InputSectionBase : public SectionBase {
@@ -225,9 +241,9 @@ class InputSectionBase : public SectionBase {
// basic blocks.
JumpInstrMod *jumpInstrMod = nullptr;
- // Auxiliary information for RISC-V linker relaxation. RISC-V does not use
- // jumpInstrMod.
- RISCVRelaxAux *relaxAux;
+ // Auxiliary information for RISC-V and LoongArch linker relaxation.
+ // RISC-V and LoongArch does not use jumpInstrMod.
+ RelaxAux *relaxAux;
// The compressed content size when `compressed` is true.
size_t compressedSize;
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index ab6b6b9c013ba3..ed00e81c0e6c81 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -95,6 +95,8 @@ class TargetInfo {
// Do a linker relaxation pass and return true if we changed something.
virtual bool relaxOnce(int pass) const { return false; }
+ // Do finalize relaxation after collecting relaxation infos.
+ virtual void finalizeRelax(int passes) const {}
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
JumpModType val) const {}
@@ -236,6 +238,7 @@ void addArmSyntheticSectionMappingSymbol(Defined *);
void sortArmMappingSymbols();
void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf);
void createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files);
+void initSymbolAnchors();
LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target;
TargetInfo *getTarget();
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index dfec5e07301a74..e2d83f0122b0b8 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1750,8 +1750,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
}
}
}
- if (!config->relocatable && config->emachine == EM_RISCV)
- riscvFinalizeRelax(pass);
+ if (!config->relocatable)
+ target->finalizeRelax(pass);
if (config->relocatable)
for (OutputSection *sec : outputSections)
diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s
new file mode 100644
index 00000000000000..19588b5b21cb98
--- /dev/null
+++ b/lld/test/ELF/loongarch-relax-align.s
@@ -0,0 +1,137 @@
+# REQUIRES: loongarch
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax %s -o %t.32.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o -o %t.32
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s --check-prefixes=CHECK,NOREL
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s --check-prefixes=CHECK,NOREL
+
+## Test the R_LARCH_ALIGN without symbol index.
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64
+# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s --check-prefixes=CHECK,RELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s --check-prefixes=CHECK,NOREL
+
+## -r keeps section contents unchanged.
+# RUN: ld.lld -r %t.64.o -o %t.64.r
+# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR
+
+# RELAX-DAG: {{0*}}10000 l .text {{0*}}34 .Ltext_start
+# RELAX-DAG: {{0*}}1002c l .text {{0*}}08 .L1
+# RELAX-DAG: {{0*}}10030 l .text {{0*}}04 .L2
+# RELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+# NOREL-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
+# NOREL-DAG: {{0*}}10038 l .text {{0*}}0c .L1
+# NOREL-DAG: {{0*}}10040 l .text {{0*}}04 .L2
+# NOREL-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# CHECK: <.Ltext_start>:
+# CHECK-NEXT: 10000: break 1
+# CHECK-NEXT: 10004: break 2
+# CHECK-NEXT: 10008: nop
+# CHECK-NEXT: 1000c: nop
+# CHECK-NEXT: 10010: break 3
+# CHECK-NEXT: 10014: break 4
+# CHECK-NEXT: 10018: nop
+# CHECK-NEXT: 1001c: nop
+# RELAX-NEXT: 10020: pcaddi $zero, -8
+# RELAX-NEXT: 10024: pcaddi $zero, 2
+# RELAX-NEXT: 10028: pcaddi $zero, 2
+# NOREL-NEXT: 10020: pcalau12i $zero, 0
+# NOREL-NEXT: 10024: addi.{{[dw]}} $zero, $zero, 0
+# NOREL-NEXT: 10028: pcalau12i $zero, 0
+# NOREL-NEXT: 1002c: addi.{{[dw]}} $zero, $zero, 56
+# NOREL-NEXT: 10030: pcalau12i $zero, 0
+# NOREL-NEXT: 10034: addi.{{[dw]}} $zero, $zero, 64
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L1>:
+# RELAX-NEXT: 1002c: nop
+# NOREL-NEXT: 10038: nop
+# NOREL-NEXT: 1003c: nop
+# CHECK-EMPTY:
+# CHECK-NEXT: <.L2>:
+# CHECK-NEXT: break 5
+
+# CHECK: <.Ltext2_start>:
+# RELAX-NEXT: 20000: pcaddi $zero, 0
+# RELAX-NEXT: 20004: nop
+# NOREL-NEXT: 20000: pcalau12i $zero, 0
+# NOREL-NEXT: 20004: addi.{{[dw]}} $zero, $zero, 0
+# CHECK-NEXT: 20008: nop
+...
[truncated]
|
Add: @SixWeining @MaskRay @xen0n |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically looks good to me. Thanks.
@MaskRay What do you think about this change? If looks OK, do you mind merging this before 18.x is created or cherry-picking later? As gcc/binutils have began generating relax relocations for about 1 year, lld needs this PR so that it can link fundamental system objects (such as Scrt1.o
).
IMO at least supporting R_LARCH_RELAX should be considered a bug fix and it should be in 18.x, or we are basically shipping "unusable" things. |
Sorry for the late reply, but I agree with cherry-picking this change into LLVM 18, at least the |
lld/ELF/InputSection.h
Outdated
// jumpInstrMod. | ||
RISCVRelaxAux *relaxAux; | ||
// Auxiliary information for RISC-V and LoongArch linker relaxation. | ||
// RISC-V and LoongArch does not use jumpInstrMod. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They do not use jumpInstrMod
lld/ELF/Arch/LoongArch.cpp
Outdated
if (config->relocatable) | ||
return false; | ||
|
||
// TODO Relax multiple times. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I did some edge tests, it triggers "relaxation not converged". In the following example, <1> and <2> only one can be relaxed and cause pass
increasing. So I disable multiple times relax for stability.
.global sym1
.global sym2
.type sym1, @function
.type sym2, @function
.section ".sec1", "ax"
sym1:
la.pcrel $t0, .L2 - 8 # <1>
.L1:
break 0
.size sym1, . - sym1
.section ".sec2", "ax"
sym2:
.p2align 3
.p2align 3
la.pcrel $t0, .L1 - 8 # <2>
.L2:
break 0
.size sym2, . - sym2
$ ./bin/llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax tmp.s -o tmp.o
$ ./bin/ld.lld tmp.o -e 0 --section-start=.sec1=0x000000 --section-start=.sec2=0x200000
$ ld.lld: error: relaxation not converged
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interestingly, ld.bfd relaxes neither.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ld.bfd relax R_LARCH_ALIGN at the final pass. So it not conflict in this test. But it also has some shortages. Some symbol values will change due to final relax R_LARCH_ALIGN, so that the previous relax may fail.
.text
.p2align 4
.irp t,0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
.p2align 10
.endr
.p2align 6
.p2align 4
la.pcrel $s9, .L1
.section .text2, "ax"
.p2align 4
la.pcrel $r0, .L1
.data
.L1: .word 0
$ ./gas/as-new 1.s -o 1.o
$ ./ld/ld-new 1.o -e 0 -Ttext=0x100000 -Tdata=0x300000
./ld/ld-new: 1.o: relocation R_LARCH_PCREL20_S2 overflow 0x200000
In my idea, I think the relax method MaskRay supported for RISCV is better. I keep here just one time relaxation because I'm also afraid that I don't know enough this method. In this relaxation all R_LARCH_ALIGN will be relaxed, which is expected.
lld/ELF/Arch/LoongArch.cpp
Outdated
const uint64_t dest = sym.getVA() + r.addend; | ||
const int64_t displace = dest - loc; | ||
|
||
if (!sym.isLocal()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems strange. `isPreemptible decides whether a GOT can be optimized.
lld/test/ELF/loongarch-relax-align.s
Outdated
# NOREL-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start | ||
|
||
# CHECK: <.Ltext_start>: | ||
# CHECK-NEXT: 10000: break 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For newer tests, omit addresses then they are insignificant
For release/18.x, I think relaxing just |
Refer to commit 6611d58 ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and `initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an arch-specific function, put it override on `TargetInfo::finalizeRelax`, so that LoongArch can override it, too. The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30.
I agree about the spliting. In addition, I think lld-18 needs to process |
Well, if it can't, it's largely due to GCC/binutils moving too fast on relaxation :( (which I warned long time ago). |
a926137
to
6460625
Compare
Split it and support relax R_LARCH_ALIGN only. I've force pushed it and updated the title and commit info. Please review. Thanks! |
Patch for uleb128 is relative easy (already done locally) and will submitted after this PR is merged. |
lld/ELF/Arch/LoongArch.cpp
Outdated
|
||
// The rel.type may be fixed to R_LARCH_RELAX and the instruction | ||
// will be deleted, then cause size<0 | ||
if (remove == 0 && (int64_t)size < 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole if
is unneeded? No test fail.
continue; | ||
} | ||
|
||
memcpy(p, old.data() + offset, size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RISCV.cpp has just one memcpy and it is immediately after uint64_t size = r.offset - offset;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first memcpy is copy from the last offset to current reloc offset. The second memcpy is copy from final offset the to the section end.
nop; nop | ||
.endif | ||
|
||
## +0x20: Test symbol value and symbol size can be handled. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The symbol value test may be weak. riscv-relax-align.s
carefully covers many cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is enough for check symbol value. .Ltext_start
checks value is not changed but size is changed. .L1
checks value is changed and size is changed. .L2
checks value is changed but size is not changed.
I'd suggest make some tests to see if |
With lld's structure, a |
But |
Just test the current behavior, so that a future change supporting |
I have created a backport issue #80789 for this PR. |
Refer to commit 6611d58 ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and `initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an arch-specific function, put it override on `TargetInfo::finalizeRelax`, so that LoongArch can override it, too. The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30. (cherry picked from commit 06a728f)
Refer to commit 6611d58 ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse `SymbolAnchor`, `RISCVRelaxAux` and `initSymbolAnchors` to simplify codes. As `riscvFinalizeRelax` is an arch-specific function, put it override on `TargetInfo::finalizeRelax`, so that LoongArch can override it, too. The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30. (cherry picked from commit 06a728f)
Refer to commit 6611d58 ("Relax R_RISCV_ALIGN"), we can relax R_LARCH_ALIGN by same way. Reuse
SymbolAnchor
,RISCVRelaxAux
andinitSymbolAnchors
to simplify codes. AsriscvFinalizeRelax
is an arch-specific function, put it override onTargetInfo::finalizeRelax
, so that LoongArch can override it, too.The flow of relax R_LARCH_ALIGN is almost consistent with RISCV. The difference is that LoongArch only has 4-bytes NOP and all executable insn is 4-bytes aligned. So LoongArch not need rewrite NOP sequence. Alignment maxBytesEmit parameter is supported in psABI v2.30.