Skip to content

Commit

Permalink
Address comments from Ellis
Browse files Browse the repository at this point in the history
  • Loading branch information
kyulee-com committed May 5, 2024
1 parent a8f5e1a commit 1dbd111
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 148 deletions.
6 changes: 3 additions & 3 deletions llvm/include/llvm/CodeGenData/CodeGenData.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;

/// This flag is set when -fcgdata-generate is passed.
/// Or, it can be mutated with -ftwo-codegen-rounds during two codegen runs.
/// This flag is set when -fcodegen-data-generate is passed.
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
bool EmitCGData;

/// This is a singleton instance which is thread-safe. Unlike profile data
Expand Down Expand Up @@ -174,7 +174,7 @@ namespace IndexedCGData {
const uint64_t Magic = 0x81617461646763ff; // "\xffcgdata\x81"

enum CGDataVersion {
// Version 1 is the first version. This version support the outlined
// Version 1 is the first version. This version supports the outlined
// hash tree.
Version1 = 1,
CurrentVersion = CG_DATA_INDEX_VERSION
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/CodeGenData/CodeGenDataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,23 +144,23 @@ Error TextCodeGenDataReader::read() {

// Parse the custom header line by line.
while (Line->starts_with(":")) {
StringRef Str = Line->substr(1);
StringRef Str = Line->drop_front().rtrim();
if (Str.equals_insensitive("outlined_hash_tree"))
DataKind |= CGDataKind::FunctionOutlinedHashTree;
else
return error(cgdata_error::bad_header);
++Line;
}

// We treat an empty header (that as a comment # only) as a valid header.
// We treat an empty header (that is a comment # only) as a valid header.
if (Line.is_at_eof()) {
if (DataKind != CGDataKind::Unknown)
return error(cgdata_error::bad_header);
return Error::success();
}

// The YAML docs follow after the header.
const char *Pos = (*Line).data();
const char *Pos = Line->data();
size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
reinterpret_cast<size_t>(Pos);
yaml::Input YOS(StringRef(Pos, Size));
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGenData/CodeGenDataWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {

Header.OutlinedHashTreeOffset = 0;

// Only write out up to the CGDataKind. We need to remember the offest of the
// remaing fields to allow back patching later.
// Only write up to the CGDataKind. We need to remember the offset of the
// remaining fields to allow back-patching later.
COS.write(Header.Magic);
COS.write32(Header.Version);
COS.write32(Header.DataKind);
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/tools/llvm-cgdata/dump.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ RUN: split-file %s %t
RUN: llvm-cgdata dump -binary %t/dump.cgtext -o %t/dump.cgdata
RUN: llvm-cgdata dump -text %t/dump.cgdata -o %t/dump-round.cgtext
RUN: llvm-cgdata dump -binary %t/dump-round.cgtext -o %t/dump-round.cgdata
RUN: llvm-cgdata dump -text %t/dump-round.cgtext -o %t/dump-round-round.cgtext
RUN: diff %t/dump.cgdata %t/dump-round.cgdata
RUN: diff %t/dump-round.cgtext %t/dump-round-round.cgtext

;--- dump.cgtext
# Outlined stable hash tree
Expand Down
17 changes: 10 additions & 7 deletions llvm/test/tools/llvm-cgdata/empty.test
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
# Test no input file
RUN: not llvm-cgdata dump -o - 2>&1 | FileCheck %s --check-prefix=NOFILE --ignore-case
NOFILE: error: No such file or directory

# Test for empty cgdata file, which is invalid.
RUN: touch %t_emptyfile.cgtext
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text -o - 2>&1 | FileCheck %s --check-prefix ERROR
ERROR: {{.}}emptyfile.cgtext: empty codegen data
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text 2>&1 | FileCheck %s --check-prefix=EMPTY
EMPTY: {{.}}emptyfile.cgtext: empty codegen data

# Test for empty header in the text format. It can be converted to a valid binary file.
RUN: printf '#' > %t_emptyheader.cgtext
RUN: llvm-cgdata dump %t_emptyheader.cgtext -binary -o %t_emptyheader.cgdata

# Without any cgdata other than the header, no data shows by default.
RUN: llvm-cgdata show %t_emptyheader.cgdata | FileCheck %s --allow-empty --check-prefix EMPTY
EMPTY-NOT: any
RUN: llvm-cgdata show %t_emptyheader.cgdata | count 0

# The version number appears when asked, as it's in the header
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix VERSION
VERSION: Version: {{.}}
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
VERSION: Version: 1

# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text -o - | FileCheck %s --allow-empty --check-prefix EMPTY
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text | count 0

# Synthesize a header only cgdata.
# struct Header {
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/tools/llvm-cgdata/error.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,31 @@
# uint64_t OutlinedHashTreeOffset;
# }
RUN: touch %t_empty.cgdata
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix EMPTY
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
EMPTY: {{.}}cgdata: empty codegen data

# Not a magic.
RUN: printf '\xff' > %t_malformed.cgdata
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix MALFORMED
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED
MALFORMED: {{.}}cgdata: malformed codegen data

# The minimum header size is 24.
RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix CORRUPT
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)

# The current version 1 while the header says 2.
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix BAD_VERSION
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION
BAD_VERSION: {{.}}cgdata: unsupported codegen data version

# Header says an outlined hash tree, but the file ends after the header.
RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix EOF
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF
EOF: {{.}}cgdata: end of File
85 changes: 50 additions & 35 deletions llvm/test/tools/llvm-cgdata/merge-archive.test
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
# REQUIRES: shell
# UNSUPPORTED: system-windows

# Merge an archive that has two object files having cgdata (__llvm_outline)

RUN: split-file %s %t

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o

# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o

# Make an archive from two object files
RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o

# Merge the archive into the codegen data file.
RUN: llvm-cgdata merge %t/merge-archive.a -o %t/merge-archive.cgdata
RUN: llvm-cgdata show %t/merge-archive.cgdata | FileCheck %s
CHECK: Outlined hash tree:
CHECK-NEXT: Total Node Count: 4
CHECK-NEXT: Terminal Node Count: 2
CHECK-NEXT: Depth: 2

RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix TREE
RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE
TREE: # Outlined stable hash tree
TREE-NEXT: :outlined_hash_tree
TREE-NEXT: ---
Expand All @@ -34,42 +50,41 @@ TREE-NEXT: Terminals: 4
TREE-NEXT: SuccessorIds: [ ]
TREE-NEXT: ...

;--- raw-1.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x2
Terminals: 4
SuccessorIds: [ ]
...

;--- merge-1.ll
@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"

; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
;---
;0:
; Hash: 0x0
; Terminals: 0
; SuccessorIds: [ 1 ]
;1:
; Hash: 0x1
; Terminals: 0
; SuccessorIds: [ 2 ]
;2:
; Hash: 0x2
; Terminals: 4
; SuccessorIds: [ ]
;...

@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
;--- raw-2.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x3
Terminals: 5
SuccessorIds: [ ]
...

;--- merge-2.ll

; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
;---
;0:
; Hash: 0x0
; Terminals: 0
; SuccessorIds: [ 1 ]
;1:
; Hash: 0x1
; Terminals: 0
; SuccessorIds: [ 2 ]
;2:
; Hash: 0x3
; Terminals: 5
; SuccessorIds: [ ]
;...

@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
@.data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
81 changes: 48 additions & 33 deletions llvm/test/tools/llvm-cgdata/merge-concat.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
# REQUIRES: shell
# UNSUPPORTED: system-windows

# Merge a binary file (e.g., a linked executable) having concatnated cgdata (__llvm_outline)

RUN: split-file %s %t

# Synthesize two set of raw cgdata without the header (24 byte) from the indexed cgdata.
# Concatenate them in merge-concat.ll
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll

RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
RUN: llvm-cgdata merge %t/merge-concat.o -o %t/merge-concat.cgdata
RUN: llvm-cgdata show %t/merge-concat.cgdata | FileCheck %s
Expand All @@ -10,7 +22,7 @@ CHECK-NEXT: Total Node Count: 4
CHECK-NEXT: Terminal Node Count: 2
CHECK-NEXT: Depth: 2

RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix TREE
RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE
TREE: # Outlined stable hash tree
TREE-NEXT: :outlined_hash_tree
TREE-NEXT: ---
Expand All @@ -32,37 +44,40 @@ TREE-NEXT: Terminals: 4
TREE-NEXT: SuccessorIds: [ ]
TREE-NEXT: ...

;--- merge-concat.ll
;--- raw-1.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x2
Terminals: 4
SuccessorIds: [ ]
...

; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other word, the following two trees are encoded back-to-back in a binary format.
;---
;0:
; Hash: 0x0
; Terminals: 0
; SuccessorIds: [ 1 ]
;1:
; Hash: 0x1
; Terminals: 0
; SuccessorIds: [ 2 ]
;2:
; Hash: 0x2
; Terminals: 4
; SuccessorIds: [ ]
;...
;---
;0:
; Hash: 0x0
; Terminals: 0
; SuccessorIds: [ 1 ]
;1:
; Hash: 0x1
; Terminals: 0
; SuccessorIds: [ 2 ]
;2:
; Hash: 0x3
; Terminals: 5
; SuccessorIds: [ ]
;...
;--- raw-2.cgtext
:outlined_hash_tree
0:
Hash: 0x0
Terminals: 0
SuccessorIds: [ 1 ]
1:
Hash: 0x1
Terminals: 0
SuccessorIds: [ 2 ]
2:
Hash: 0x3
Terminals: 5
SuccessorIds: [ ]
...

;--- merge-concat.ll

@.data1 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
@.data2 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format.
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
@.data2 = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
Loading

0 comments on commit 1dbd111

Please sign in to comment.