Skip to content

Commit

Permalink
[CGData] llvm-cgdata
Browse files Browse the repository at this point in the history
The llvm-cgdata tool has been introduced to handle reading and writing of codegen data. This data includes an optimistic codegen summary that can be utilized to enhance subsequent codegen. Currently, the tool supports saving and restoring the outlined hash tree, facilitating machine function outlining across modules. Additional codegen summaries can be incorporated into separate sections as required. This patch primarily establishes basic support for the reader and writer, similar to llvm-profdata.

The high-level operations of llvm-cgdata are as follows:
1. It reads local raw codegen data from a custom section (for example, __llvm_outline)  embedded in native binary files
2. It merges local raw codegen data into an indexed codegen data, complete with a suitable header.
3. It handles reading and writing of the indexed codegen data into a standalone file.
  • Loading branch information
kyulee-com committed Apr 26, 2024
1 parent e405e02 commit d22358a
Show file tree
Hide file tree
Showing 20 changed files with 1,681 additions and 0 deletions.
202 changes: 202 additions & 0 deletions llvm/include/llvm/CodeGenData/CodeGenData.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for codegen data that has stable summary which
// can be used to optimize the code in the subsequent codegen.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGENDATA_CODEGENDATA_H
#define LLVM_CODEGENDATA_CODEGENDATA_H

#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGenData/OutlinedHashTree.h"
#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TargetParser/Triple.h"
#include <mutex>

namespace llvm {

enum CGDataSectKind {
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
#include "llvm/CodeGenData/CodeGenData.inc"
};

std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
Triple::ObjectFormatType OF,
bool AddSegmentInfo = true);

enum class CGDataKind {
Unknown = 0x0,
// A function outlining info.
FunctionOutlinedHashTree = 0x1,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
};

const std::error_category &cgdata_category();

enum class cgdata_error {
success = 0,
eof,
bad_magic,
bad_header,
empty_cgdata,
malformed,
unsupported_version,
};

inline std::error_code make_error_code(cgdata_error E) {
return std::error_code(static_cast<int>(E), cgdata_category());
}

class CGDataError : public ErrorInfo<CGDataError> {
public:
CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
: Err(Err), Msg(ErrStr.str()) {
assert(Err != cgdata_error::success && "Not an error");
}

std::string message() const override;

void log(raw_ostream &OS) const override { OS << message(); }

std::error_code convertToErrorCode() const override {
return make_error_code(Err);
}

cgdata_error get() const { return Err; }
const std::string &getMessage() const { return Msg; }

/// Consume an Error and return the raw enum value contained within it, and
/// the optional error message. The Error must either be a success value, or
/// contain a single CGDataError.
static std::pair<cgdata_error, std::string> take(Error E) {
auto Err = cgdata_error::success;
std::string Msg = "";
handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
assert(Err == cgdata_error::success && "Multiple errors encountered");
Err = IPE.get();
Msg = IPE.getMessage();
});
return {Err, Msg};
}

static char ID;

private:
cgdata_error Err;
std::string Msg;
};

enum CGDataMode {
None,
Read,
Write,
};

class CodeGenData {
/// Global outlined hash tree that has oulined hash sequences across modules.
std::unique_ptr<OutlinedHashTree> PublishedHashTree;

/// This flag is set when -fcgdata-generate is passed.
/// Or, it can be mutated with -ftwo-codegen-rounds during two codegen runs.
bool EmitCGData;

/// This is a singleton instance which is thread-safe. Unlike profile data
/// which is largely function-based, codegen data describes the whole module.
/// Therefore, this can be initialized once, and can be used across modules
/// instead of constructing the same one for each codegen backend.
static std::unique_ptr<CodeGenData> Instance;
static std::once_flag OnceFlag;

CodeGenData() = default;

public:
~CodeGenData() = default;

static CodeGenData &getInstance();

/// Returns true if we have a valid outlined hash tree.
bool hasOutlinedHashTree() {
return PublishedHashTree && !PublishedHashTree->empty();
}

/// Returns the outlined hash tree. This can be globally used in a read-only
/// manner.
const OutlinedHashTree *getOutlinedHashTree() {
return PublishedHashTree.get();
}

/// Returns true if we should write codegen data.
bool emitCGData() { return EmitCGData; }

/// Publish the (globally) merged or read outlined hash tree.
void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
PublishedHashTree = std::move(HashTree);
// Ensure we disable emitCGData as we do not want to read and write both.
EmitCGData = false;
}
};

namespace cgdata {

inline bool hasOutlinedHashTree() {
return CodeGenData::getInstance().hasOutlinedHashTree();
}

inline const OutlinedHashTree *getOutlinedHashTree() {
return CodeGenData::getInstance().getOutlinedHashTree();
}

inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }

inline void
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
}

void warn(Error E, StringRef Whence = "");
void warn(Twine Message, std::string Whence = "", std::string Hint = "");

} // end namespace cgdata

namespace IndexedCGData {

const uint64_t Magic = 0x81617461646763ff; // "\xffcgdata\x81"

enum CGDataVersion {
// Version 1 is the first version. This version support the outlined
// hash tree.
Version1 = 1,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;

struct Header {
uint64_t Magic;
uint32_t Version;
uint32_t DataKind;
uint64_t OutlinedHashTreeOffset;

// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
// the new field is read correctly.

// Reads a header struct from the buffer.
static Expected<Header> readFromBuffer(const unsigned char *Curr);
};

} // end namespace IndexedCGData

} // end namespace llvm

#endif // LLVM_CODEGEN_PREPARE_H
46 changes: 46 additions & 0 deletions llvm/include/llvm/CodeGenData/CodeGenData.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|* See https://llvm.org/LICENSE.txt for license information.
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
/*
* This is the main file that defines all the data structure, signature,
* constant literals that are shared across compiler, host tools (reader/writer)
* to support codegen data.
*
\*===----------------------------------------------------------------------===*/

#ifdef CG_DATA_SECT_ENTRY
#define CG_DATA_DEFINED
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
CG_DATA_OUTLINE_COFF, "__DATA,")

#undef CG_DATA_SECT_ENTRY
#endif

/* section name strings common to all targets other
than WIN32 */
#define CG_DATA_OUTLINE_COMMON __llvm_outline
/* Since cg data sections are not allocated, we don't need to
* access them at runtime.
*/
#define CG_DATA_OUTLINE_COFF ".loutline"

#ifdef _WIN32
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF

#else
/* Runtime section names and name strings. */
#define CG_DATA_SECT_NAME INSTR_PROF_QUOTE(CG_DATA_OUTLINE_COMMON)

#endif

/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 1

/* Helper macros. */
#define CG_DATA_SIMPLE_QUOTE(x) #x
#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x)
Loading

0 comments on commit d22358a

Please sign in to comment.