Skip to content

Commit

Permalink
colexec,bazel: re-work code-gen through bazel
Browse files Browse the repository at this point in the history
Now that we've added a way to source a specific template file in
`execgen` (instead of relying on hard-coded paths, see cockroachdb#56982), we can
simplify how we generate eg.go files. This lets us parallelize the
generation of these files, as the more fine-grained dependency tracking
lets bazel generate each eg.go file concurrently (previously we had to
specify the superset of all template files as dependencies for the
generation of each individual eg.go file).

There's one exception for the generation of like_ops.eg.go, the
generation of which appears to want read from a second template file.
We've special-cased the generation of this file into it's own thing.

Release note: None
  • Loading branch information
irfansharif committed Nov 25, 2020
1 parent 86b9b43 commit 4800cf2
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 93 deletions.
67 changes: 14 additions & 53 deletions pkg/sql/colexec/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
load(":COLEXEC.bzl", "generate")
load(":COLEXEC.bzl", "eg_go_filegroup", "gen_eg_go_rules", "gen_like_ops_rule")

go_library(
name = "colexec",
Expand Down Expand Up @@ -52,6 +52,7 @@ go_library(
"unordered_distinct.go",
"utils.go",
":gen-exec", # keep
":gen-like-ops", # keep
],
importpath = "github.com/cockroachdb/cockroach/pkg/sql/colexec",
visibility = ["//visibility:public"],
Expand Down Expand Up @@ -200,57 +201,17 @@ go_test(
)

# Define a file group for all the .eg.go targets.
targets = [
"and_or_projection.eg.go",
"cast.eg.go",
"const.eg.go",
"default_cmp_expr.eg.go",
"default_cmp_proj_ops.eg.go",
"default_cmp_sel_ops.eg.go",
"distinct.eg.go",
"hashjoiner.eg.go",
"hashtable_distinct.eg.go",
"hashtable_full_default.eg.go",
"hashtable_full_deleting.eg.go",
"hash_aggregator.eg.go",
"hash_utils.eg.go",
"is_null_ops.eg.go",
"like_ops.eg.go",
"mergejoinbase.eg.go",
"mergejoiner_exceptall.eg.go",
"mergejoiner_fullouter.eg.go",
"mergejoiner_inner.eg.go",
"mergejoiner_intersectall.eg.go",
"mergejoiner_leftanti.eg.go",
"mergejoiner_leftouter.eg.go",
"mergejoiner_leftsemi.eg.go",
"mergejoiner_rightanti.eg.go",
"mergejoiner_rightouter.eg.go",
"mergejoiner_rightsemi.eg.go",
"ordered_synchronizer.eg.go",
"proj_const_left_ops.eg.go",
"proj_const_right_ops.eg.go",
"proj_non_const_ops.eg.go",
"quicksort.eg.go",
"rank.eg.go",
"relative_rank.eg.go",
"row_number.eg.go",
"rowstovec.eg.go",
"selection_ops.eg.go",
"select_in.eg.go",
"sort.eg.go",
"substring.eg.go",
"values_differ.eg.go",
"vec_comparators.eg.go",
"window_peer_grouper.eg.go",
]
eg_go_filegroup(name = "gen-exec")

# Define a generator for all execgen code. Look towards COLEXEC.bzl for the
# implementation.
generate(
name = "gen-exec",
execgen = "//pkg/sql/colexec/execgen/cmd/execgen",
goimports = "//vendor/github.com/cockroachdb/gostdlib/x/tools/cmd/goimports",
targets = targets,
templates = glob(["*_tmpl.go"]),
# Define gen rules for individual eg.go files.
gen_eg_go_rules()

# Special-case the gen rule for like_ops.eg.go.
gen_like_ops_rule(
name = "gen-like-ops",
target = "like_ops.eg.go",
templates = [
"selection_ops_tmpl.go",
"proj_const_ops_tmpl.go",
],
)
155 changes: 115 additions & 40 deletions pkg/sql/colexec/COLEXEC.bzl
Original file line number Diff line number Diff line change
@@ -1,44 +1,119 @@
def _generate_impl(ctx):
output_files, input_files = [], []
# Gather the full list of all input template files (deconstructing it from
# the filegroup input). See [1] for a reference of all types/members within
# skylark.
#
# [1]: https://docs.bazel.build/versions/master/skylark/lib/skylark-overview.html
for tmpl in ctx.attr.templates:
input_files.extend(tmpl.files.to_list())
# Map between target name and relevant template.
targets = [
('and_or_projection.eg.go', 'and_or_projection_tmpl.go'),
('cast.eg.go', 'cast_tmpl.go'),
('const.eg.go', 'const_tmpl.go'),
('default_cmp_expr.eg.go', 'default_cmp_expr_tmpl.go'),
('default_cmp_proj_ops.eg.go', 'default_cmp_proj_ops_tmpl.go'),
('default_cmp_sel_ops.eg.go', 'default_cmp_sel_ops_tmpl.go'),
('distinct.eg.go', 'distinct_tmpl.go'),
('hash_aggregator.eg.go', 'hash_aggregator_tmpl.go'),
('hash_utils.eg.go', 'hash_utils_tmpl.go'),
('hashjoiner.eg.go', 'hashjoiner_tmpl.go'),
('hashtable_distinct.eg.go', 'hashtable_tmpl.go'),
('hashtable_full_default.eg.go', 'hashtable_tmpl.go'),
('hashtable_full_deleting.eg.go', 'hashtable_tmpl.go'),
('is_null_ops.eg.go', 'is_null_ops_tmpl.go'),
# ('like_ops.eg.go', ...); See `gen_like_ops_rule` below.
('mergejoinbase.eg.go', 'mergejoinbase_tmpl.go'),
('mergejoiner_exceptall.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_fullouter.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_inner.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_intersectall.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_leftanti.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_leftouter.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_leftsemi.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_rightanti.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_rightouter.eg.go', 'mergejoiner_tmpl.go'),
('mergejoiner_rightsemi.eg.go', 'mergejoiner_tmpl.go'),
('ordered_synchronizer.eg.go', 'ordered_synchronizer_tmpl.go'),
('proj_const_left_ops.eg.go', 'proj_const_ops_tmpl.go'),
('proj_const_right_ops.eg.go', 'proj_const_ops_tmpl.go'),
('proj_non_const_ops.eg.go', 'proj_non_const_ops_tmpl.go'),
('quicksort.eg.go', 'quicksort_tmpl.go'),
('rank.eg.go', 'rank_tmpl.go'),
('relative_rank.eg.go', 'relative_rank_tmpl.go'),
('row_number.eg.go', 'row_number_tmpl.go'),
('rowstovec.eg.go', 'rowstovec_tmpl.go'),
('select_in.eg.go', 'select_in_tmpl.go'),
('selection_ops.eg.go', 'selection_ops_tmpl.go'),
('sort.eg.go', 'sort_tmpl.go'),
('substring.eg.go', 'substring_tmpl.go'),
('values_differ.eg.go', 'values_differ_tmpl.go'),
('vec_comparators.eg.go', 'vec_comparators_tmpl.go'),
('window_peer_grouper.eg.go', 'window_peer_grouper_tmpl.go'),
]

execgen_binary = ctx.executable.execgen
goimports_binary = ctx.executable.goimports
input_binaries = [execgen_binary, goimports_binary]
def rule_name_for(target):
# e.g. 'vec_comparators.eg.go' -> 'gen-vec-comparators'
return 'gen-{}'.format(target.replace('.eg.go', '').replace('_', '-'))

# For each target, run execgen and goimports.
for generated_file_name in ctx.attr.targets:
generated_file = ctx.actions.declare_file(generated_file_name)
ctx.actions.run_shell(
tools = input_binaries,
inputs = input_files,
outputs = [generated_file],
progress_message = "Generating pkg/cmd/colexec/%s" % generated_file_name,
command = """
ln -s external/cockroach/pkg pkg
%s -fmt=false pkg/sql/colexec/%s > %s
%s -w %s
""" % (execgen_binary.path, generated_file_name, generated_file.path, goimports_binary.path, generated_file.path),
)
output_files.append(generated_file)
# Define a file group for all the .eg.go targets.
def eg_go_filegroup(name):
native.filegroup(
name = name,
srcs = [':{}'.format(rule_name_for(target)) for target, _ in targets],
)

# Define gen rules for individual eg.go files.
def gen_eg_go_rules():
# Define some aliases for ease of use.
native.alias(
name = "execgen",
actual = "//pkg/sql/colexec/execgen/cmd/execgen",
)
native.alias(
name = "goimports",
actual = "//vendor/github.com/cockroachdb/gostdlib/x/tools/cmd/goimports",
)

# Construct the full set of output files for bazel to be able to analyse.
# See https://docs.bazel.build/versions/master/skylark/lib/DefaultInfo.html.
return [DefaultInfo(files = depset(output_files))]
for target, template in targets:
name = rule_name_for(target)

native.genrule(
name = name,
srcs = [template],
outs = [target],
# `$@` lets us substitute in the output path[1]. The symlink below
# is frowned upon for genrules[2]. That said, when testing
# pkg/sql/colexec through bazel it expects to find the template
# files in a path other than what SRCS would suggest. We haven't
# really investigated why. For now lets just symlink the relevant
# files into the "right" path within the bazel sandbox[3].
#
# [1]: https://docs.bazel.build/versions/3.7.0/be/general.html#genrule_args
# [2]: https://docs.bazel.build/versions/3.7.0/be/general.html#general-advice
# [3]: https://github.com/cockroachdb/cockroach/pull/57027
cmd = """
ln -s external/cockroach/pkg pkg
$(location :execgen) -template $(SRCS) \
-fmt=false pkg/sql/colexec/$@ > $@
$(location :goimports) -w $@
""",
tools = [":execgen", ":goimports"],
)

generate = rule(
implementation = _generate_impl,
# Source all the necessary template files, output targets, the execgen and goimports binaries.
attrs = {
"templates": attr.label_list(mandatory = True, allow_files = ["tmpl.go"]),
"targets": attr.string_list(mandatory = True),
"execgen": attr.label(mandatory = True, executable = True, allow_files = True, cfg = "exec"),
"goimports": attr.label(mandatory = True, executable = True, allow_files = True, cfg = "exec"),
},
)
# TODO(irfansharif): We should be able to use `gen_eg_go_rules` to generate
# like_ops.eg.go. It's special-cased here because execgen reads two separate
# template files[1] when generating like_ops.eg.go.
#
# [1]: https://github.com/cockroachdb/cockroach/blob/1f23ef2b4e/pkg/sql/colexec/execgen/cmd/execgen/like_ops_gen.go#L48
def gen_like_ops_rule(name, templates, target):
native.genrule(
name = name,
srcs = templates,
outs = [target],
# See TODO above. We should ideally be using $(SRCS) to point to the
# template file, but like_ops.eg.go needs access to two template files.
# We point to the first, which is the template the generator is
# registered with, but we also need to include the other in our srcs so
# it's included in the sandbox when generating the file (-template only
# expects one argument).
cmd = """
ln -s external/cockroach/pkg pkg
$(location :execgen) -template $(location %s) \
-fmt=false pkg/sql/colexec/$@ > $@
$(location :goimports) -w $@
""" % (templates[0]),
tools = [":execgen", ":goimports"], # Make use of the same aliases defined above.
)

0 comments on commit 4800cf2

Please sign in to comment.