Skip to content

Commit

Permalink
tools: API boosting support for rewriting elaborated types. (#9375)
Browse files Browse the repository at this point in the history
This PR introduces API boosting support for ElaboratedTypeLoc, one of a
number of Clang AST nodes that we need to analyze and rewrite to move to
the latest type according to the type database.

The approach taken is to generate a replacements YAML file suitable for
consumption by clang-apply-replacements; this is how clang-tidy works
internally. Replacements are a series of patch like file transformation
operations, Clang has nice support for building replacement sets and
serializing to YAML.

This PR also starts to split out api_booster into more modules (just
some utils to start with), introduces some more unit tests and a golden
C++ source file test framework.

Risk level: Low
Testing: New unit and e2e golden tests added.

Part of #8082

Signed-off-by: Harvey Tuch <[email protected]>
  • Loading branch information
htuch authored Dec 19, 2019
1 parent 7114dcd commit cdad62e
Show file tree
Hide file tree
Showing 14 changed files with 409 additions and 100 deletions.
26 changes: 20 additions & 6 deletions test/tools/type_whisperer/api_type_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,28 @@ namespace Tools {
namespace TypeWhisperer {
namespace {

TEST(ApiTypeDb, GetProtoPathForTypeUnknown) {
const auto unknown_type_path = ApiTypeDb::getProtoPathForType("foo");
EXPECT_EQ(absl::nullopt, unknown_type_path);
// Validate that ApiTypeDb::getLatestTypeInformation returns nullopt when no
// type information exists.
TEST(ApiTypeDb, GetLatestTypeInformationForTypeUnknown) {
const auto unknown_type_information = ApiTypeDb::getLatestTypeInformation("foo");
EXPECT_EQ(absl::nullopt, unknown_type_information);
}

TEST(ApiTypeDb, GetProtoPathForTypeKnown) {
const auto known_type_path = ApiTypeDb::getProtoPathForType("envoy.type.Int64Range");
EXPECT_EQ("envoy/type/range.proto", *known_type_path);
// Validate that ApiTypeDb::getLatestTypeInformation fetches the latest type
// information when an upgrade occurs.
TEST(ApiTypeDb, GetLatestTypeInformationForTypeKnownUpgraded) {
const auto known_type_information = ApiTypeDb::getLatestTypeInformation("envoy.type.Int64Range");
EXPECT_EQ("envoy.type.v3alpha.Int64Range", known_type_information->type_name_);
EXPECT_EQ("envoy/type/v3alpha/range.proto", known_type_information->proto_path_);
}

// Validate that ApiTypeDb::getLatestTypeInformation is idempotent when no
// upgrade occurs.
TEST(ApiTypeDb, GetLatestTypeInformationForTypeKnownNoUpgrade) {
const auto known_type_information =
ApiTypeDb::getLatestTypeInformation("envoy.type.v3alpha.Int64Range");
EXPECT_EQ("envoy.type.v3alpha.Int64Range", known_type_information->type_name_);
EXPECT_EQ("envoy/type/v3alpha/range.proto", known_type_information->proto_path_);
}

} // namespace
Expand Down
97 changes: 58 additions & 39 deletions tools/api_boost/api_boost.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@
import re
import subprocess as sp

# Temporary location of modified files.
TMP_SWP_SUFFIX = '.tmp.swp'

# Detect API #includes.
API_INCLUDE_REGEX = re.compile('#include "(envoy/.*)/[^/]+\.pb\.(validate\.)?h"')


# Obtain the directory containing a path prefix, e.g. ./foo/bar.txt is ./foo,
# ./foo/ba is ./foo, ./foo/bar/ is ./foo/bar.
def PrefixDirectory(path_prefix):
return path_prefix if os.path.isdir(path_prefix) else os.path.dirname(path_prefix)


# Update a C++ file to the latest API.
def ApiBoostFile(llvm_include_path, debug_log, path):
print('Processing %s' % path)
Expand All @@ -44,17 +47,22 @@ def ApiBoostFile(llvm_include_path, debug_log, path):
if debug_log:
print(result.stderr.decode('utf-8'))

# Consume stdout containing the list of inferred API headers. We don't have
# rewrite capabilities yet in the API booster, so we rewrite here in Python
# below.
inferred_api_includes = sorted(set(result.stdout.decode('utf-8').splitlines()))
# Consume stdout containing the list of inferred API headers.
return sorted(set(result.stdout.decode('utf-8').splitlines()))


# Rewrite API includes to the inferred headers. Currently this is handled
# outside of the clang-ast-replacements. In theory we could either integrate
# with this or with clang-include-fixer, but it's pretty simply to handle as done
# below, we have more control over special casing as well, so ¯\_(ツ)_/¯.
def RewriteIncludes(args):
path, api_includes = args
# We just dump the inferred API header includes at the start of the #includes
# in the file and remove all the present API header includes. This does not
# match Envoy style; we rely on later invocations of fix_format.sh to take
# care of this alignment.
output_lines = []
include_lines = ['#include "%s"' % f for f in inferred_api_includes]
include_lines = ['#include "%s"' % f for f in api_includes]
input_text = pathlib.Path(path).read_text()
for line in input_text.splitlines():
if include_lines and line.startswith('#include'):
Expand All @@ -66,35 +74,29 @@ def ApiBoostFile(llvm_include_path, debug_log, path):
if re.match(API_INCLUDE_REGEX, line) and 'envoy/service/auth/v2alpha' not in line:
continue
output_lines.append(line)

# Write to temporary file. We can't overwrite in place as we're executing
# concurrently with other ApiBoostFile() invocations that might need the file
# we're writing to.
pathlib.Path(path + TMP_SWP_SUFFIX).write_text('\n'.join(output_lines) + '\n')


# Replace the original file with the temporary file created by ApiBoostFile()
# for a given path.
def SwapTmpFile(path):
pathlib.Path(path + TMP_SWP_SUFFIX).rename(path)
# Rewrite file.
pathlib.Path(path).write_text('\n'.join(output_lines) + '\n')


# Update the Envoy source tree the latest API.
def ApiBoostTree(args):
def ApiBoostTree(target_paths,
generate_compilation_database=False,
build_api_booster=False,
debug_log=False):
dep_build_targets = ['//%s/...' % PrefixDirectory(prefix) for prefix in target_paths]

# Optional setup of state. We need the compilation database and api_booster
# tool in place before we can start boosting.
if args.generate_compilation_database:
sp.run(['./tools/gen_compilation_database.py', '--run_bazel_build', '--include_headers'],
if generate_compilation_database:
sp.run(['./tools/gen_compilation_database.py', '--run_bazel_build', '--include_headers'] +
dep_build_targets,
check=True)

if args.build_api_booster:
if build_api_booster:
# Similar to gen_compilation_database.py, we only need the cc_library for
# setup. The long term fix for this is in
# https://github.com/bazelbuild/bazel/issues/9578.
dep_build_targets = [
'//source/...',
'//test/...',
]
#
# Figure out some cc_libraries that cover most of our external deps. This is
# the same logic as in gen_compilation_database.py.
query = 'kind(cc_library, {})'.format(' union '.join(dep_build_targets))
Expand All @@ -104,7 +106,7 @@ def ApiBoostTree(args):
query = 'attr("tags", "compilation_db_dep", {})'.format(' union '.join(dep_build_targets))
dep_lib_build_targets.extend(sp.check_output(['bazel', 'query', query]).decode().splitlines())
extra_api_booster_args = []
if args.debug_log:
if debug_log:
extra_api_booster_args.append('--copt=-DENABLE_DEBUG_LOG')

# Slightly easier to debug when we build api_booster on its own.
Expand Down Expand Up @@ -132,22 +134,38 @@ def ApiBoostTree(args):

# Determine the files in the target dirs eligible for API boosting, based on
# known files in the compilation database.
paths = set([])
file_paths = set([])
for entry in json.loads(pathlib.Path('compile_commands.json').read_text()):
file_path = entry['file']
if any(file_path.startswith(prefix) for prefix in args.paths):
paths.add(file_path)
if any(file_path.startswith(prefix) for prefix in target_paths):
file_paths.add(file_path)

# The API boosting is file local, so this is trivially parallelizable, use
# multiprocessing pool with default worker pool sized to cpu_count(), since
# this is CPU bound.
with mp.Pool() as p:
# We need two phases, to ensure that any dependency on files being modified
# in one thread on consumed transitive headers on the other thread isn't an
# issue. This also ensures that we complete all analysis error free before
# any mutation takes place.
p.map(functools.partial(ApiBoostFile, llvm_include_path, args.debug_log), paths)
p.map(SwapTmpFile, paths)
try:
with mp.Pool() as p:
# We need multiple phases, to ensure that any dependency on files being modified
# in one thread on consumed transitive headers on the other thread isn't an
# issue. This also ensures that we complete all analysis error free before
# any mutation takes place.
# TODO(htuch): we should move to run-clang-tidy.py once the headers fixups
# are Clang-based.
api_includes = p.map(functools.partial(ApiBoostFile, llvm_include_path, debug_log),
file_paths)
# Apply Clang replacements before header fixups, since the replacements
# are all relative to the original file.
for prefix in target_paths:
sp.run(['clang-apply-replacements', PrefixDirectory(prefix)], check=True)
# Fixup headers.
p.map(RewriteIncludes, zip(file_paths, api_includes))
finally:
# Cleanup any stray **/*.clang-replacements.yaml.
for prefix in target_paths:
clang_replacements = pathlib.Path(
PrefixDirectory(prefix)).glob('**/*.clang-replacements.yaml')
for path in clang_replacements:
path.unlink()


if __name__ == '__main__':
Expand All @@ -157,4 +175,5 @@ def ApiBoostTree(args):
parser.add_argument('--debug_log', action='store_true')
parser.add_argument('paths', nargs='*', default=['source', 'test', 'include'])
args = parser.parse_args()
ApiBoostTree(args)
ApiBoostTree(args.paths, args.generate_compilation_database, args.build_api_booster,
args.debug_log)
61 changes: 61 additions & 0 deletions tools/api_boost/api_boost_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3

# Golden C++ source tests for API boosting. This is effectively a test for the
# combination of api_boost.py, the Clang libtooling-based
# tools/clang_tools/api_booster, as well as the type whisperer and API type
# database.

import logging
import os
import pathlib
import shutil
import subprocess
import sys
import tempfile

import api_boost

# List of test in the form [(file_name, explanation)]
TESTS = [
('elaborated_type.cc', 'ElaboratedTypeLoc type upgrades'),
]

TESTDATA_PATH = 'tools/api_boost/testdata'


def Diff(some_path, other_path):
result = subprocess.run(['diff', '-u', some_path, other_path], capture_output=True)
if result.returncode == 0:
return None
return result.stdout.decode('utf-8') + result.stderr.decode('utf-8')


if __name__ == '__main__':
# Accumulated error messages.
logging.basicConfig(format='%(message)s')
messages = []

# Run API booster against test artifacts in a directory relative to workspace.
# We use a temporary copy as the API booster does in-place rewriting.
with tempfile.TemporaryDirectory(dir=pathlib.Path.cwd()) as path:
# Setup temporary tree.
shutil.copy(os.path.join(TESTDATA_PATH, 'BUILD'), path)
for filename, _ in TESTS:
shutil.copy(os.path.join(TESTDATA_PATH, filename), path)

# Run API booster.
api_boost.ApiBoostTree([str(pathlib.Path(path).relative_to(pathlib.Path.cwd()))],
generate_compilation_database=True,
build_api_booster=True,
debug_log=True)

# Validate output against golden files.
for filename, description in TESTS:
delta = Diff(os.path.join(TESTDATA_PATH, filename + '.gold'), os.path.join(path, filename))
if delta is not None:
messages.append('Non-empty diff for %s (%s):\n%s\n' % (filename, description, delta))

if len(messages) > 0:
logging.error('FAILED:\n{}'.format('\n'.join(messages)))
sys.exit(1)
logging.warning('PASS')
15 changes: 15 additions & 0 deletions tools/api_boost/testdata/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
licenses(["notice"]) # Apache 2

load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

envoy_package()

envoy_cc_library(
name = "elaborated_type",
srcs = ["elaborated_type.cc"],
deps = ["@envoy_api//envoy/config/overload/v2alpha:pkg_cc_proto"],
)
6 changes: 6 additions & 0 deletions tools/api_boost/testdata/elaborated_type.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#include "envoy/config/overload/v2alpha/overload.pb.h"

class ThresholdTriggerImpl {
public:
ThresholdTriggerImpl(const envoy::config::overload::v2alpha::ThresholdTrigger& /*config*/) {}
};
6 changes: 6 additions & 0 deletions tools/api_boost/testdata/elaborated_type.cc.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#include "envoy/config/overload/v3alpha/overload.pb.h"

class ThresholdTriggerImpl {
public:
ThresholdTriggerImpl(const envoy::config::overload::v3alpha::ThresholdTrigger& /*config*/) {}
};
8 changes: 6 additions & 2 deletions tools/check_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@
"./source/extensions/filters/http/squash/squash_filter.h",
"./source/extensions/filters/http/squash/squash_filter.cc",
"./source/server/http/admin.h", "./source/server/http/admin.cc",
"./tools/clang_tools/api_booster/main.cc")
"./tools/clang_tools/api_booster/main.cc",
"./tools/clang_tools/api_booster/proto_cxx_utils.h")

# Only one C++ file should instantiate grpc_init
GRPC_INIT_WHITELIST = ("./source/common/grpc/google_grpc_context.cc")
Expand Down Expand Up @@ -860,7 +861,10 @@ def checkErrorMessages(error_messages):
target_path = args.target_path
envoy_build_rule_check = not args.skip_envoy_build_rule_check
namespace_check = args.namespace_check
namespace_check_excluded_paths = args.namespace_check_excluded_paths
namespace_check_excluded_paths = args.namespace_check_excluded_paths + [
"./tools/api_boost/testdata/",
"./tools/clang_tools/",
]
build_fixer_check_excluded_paths = args.build_fixer_check_excluded_paths + [
"./bazel/external/",
"./bazel/toolchains/",
Expand Down
22 changes: 20 additions & 2 deletions tools/clang_tools/api_booster/BUILD
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
load("//clang_tools/support:clang_tools.bzl", "envoy_clang_tools_cc_binary")
load(
"//clang_tools/support:clang_tools.bzl",
"clang_tools_cc_binary",
"clang_tools_cc_library",
"clang_tools_cc_test",
)

licenses(["notice"]) # Apache 2

envoy_clang_tools_cc_binary(
clang_tools_cc_binary(
name = "api_booster",
srcs = ["main.cc"],
deps = [
":proto_cxx_utils_lib",
"@clang_tools//:clang_astmatchers",
"@clang_tools//:clang_basic",
"@clang_tools//:clang_tooling",
"@envoy//tools/type_whisperer:api_type_db_lib",
],
)

clang_tools_cc_library(
name = "proto_cxx_utils_lib",
hdrs = ["proto_cxx_utils.h"],
deps = ["@com_google_absl//absl/strings"],
)

clang_tools_cc_test(
name = "proto_cxx_utils_test",
srcs = ["proto_cxx_utils_test.cc"],
deps = [":proto_cxx_utils_lib"],
)
Loading

0 comments on commit cdad62e

Please sign in to comment.