Skip to content

Commit

Permalink
Add Crystal::Compiler#mt_codegen
Browse files Browse the repository at this point in the history
When compiled with -Dpreview_mt the compiler will take advantage of the
MT environment to codegen the compilation units in parallel, avoiding
fork (that's not supported with MT) and allowing parallel codegen on
Windows.
  • Loading branch information
ysbaddaden committed Jul 2, 2024
1 parent 95a604a commit ac91f7c
Showing 1 changed file with 105 additions and 45 deletions.
150 changes: 105 additions & 45 deletions src/compiler/crystal/compiler.cr
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ require "crystal/digest/md5"
{% if flag?(:msvc) %}
require "./loader"
{% end %}
{% if flag?(:preview_mt) %}
require "wait_group"
{% end %}

module Crystal
@[Flags]
Expand Down Expand Up @@ -80,7 +83,13 @@ module Crystal
property? no_codegen = false

# Maximum number of LLVM modules that are compiled in parallel
property n_threads : Int32 = {% if flag?(:preview_mt) || flag?(:win32) %} 1 {% else %} 8 {% end %}
property n_threads : Int32 = {% if flag?(:preview_mt) %}
ENV["CRYSTAL_WORKERS"]?.try(&.to_i?) || 4
{% elsif flag?(:win32) %}
1
{% else %}
8
{% end %}

# Default prelude file to use. This ends up adding a
# `require "prelude"` (or whatever name is set here) to
Expand Down Expand Up @@ -391,7 +400,7 @@ module Crystal
llvm_mod = unit.llvm_mod

@progress_tracker.stage("Codegen (bc+obj)") do
optimize llvm_mod unless @optimization_mode.o0?
optimize llvm_mod, target_machine unless @optimization_mode.o0?

unit.emit(@emit_targets, emit_base_filename || output_filename)

Expand Down Expand Up @@ -512,14 +521,51 @@ module Crystal

private def parallel_codegen(units, n_threads)
{% if flag?(:preview_mt) %}
raise "Cannot fork compiler in multithread mode."
raise "LLVM isn't multithreaded and cannot fork compiler in multithread mode." unless LLVM.multithreaded?
mt_codegen(units, n_threads)
{% elsif LibC.has_method?("fork") %}
fork_codegen(units, n_threads)
{% else %}
raise "Cannot fork compiler. `Crystal::System::Process.fork` is not implemented on this system."
{% end %}
end

private def mt_codegen(units, n_threads)
channel = Channel(CompilationUnit).new(n_threads * 2)
wg = WaitGroup.new(n_threads)
mutex = Mutex.new

n_threads.times do
spawn do
while unit = channel.receive?
unit.compile(isolate_context: true)
next unless wants_stats_or_progress?
mutex.synchronize { @progress_tracker.stage_progress += 1 }
end
ensure
wg.done
end
end

units.each do |unit|
# We generate the bitcode in the main thread because LLVM contexts
# must be unique per compilation unit, but we share different contexts
# across many modules (or rely on the global context); trying to
# codegen in parallel would segfault!
#
# Luckily generating the bitcode is quick and once the bitcode is
# generated we don't need the global LLVM contexts anymore but can
# parse the bitcode in an isolated context and we can parallelize the
# slowest part: the optimization pass & compiling the object file.
unit.generate_bitcode

channel.send(unit)
end
channel.close

wg.wait
end

private def fork_codegen(units, n_threads)
workers = fork_workers(n_threads) do |input, output|
while i = input.gets(chomp: true).presence
Expand Down Expand Up @@ -743,7 +789,7 @@ module Crystal
end
{% end %}

protected def optimize(llvm_mod)
protected def optimize(llvm_mod, target_machine)
{% if LibLLVM::IS_LT_130 %}
optimize_with_pass_manager(llvm_mod)
{% else %}
Expand Down Expand Up @@ -819,6 +865,9 @@ module Crystal
getter llvm_mod
property? reused_previous_compilation = false
getter object_extension : String
@memory_buffer : LLVM::MemoryBuffer?
@object_name : String?
@bc_name : String?

def initialize(@compiler : Compiler, program : Program, @name : String,
@llvm_mod : LLVM::Module, @output_dir : String, @bc_flags_changed : Bool)
Expand Down Expand Up @@ -848,73 +897,84 @@ module Crystal
@object_extension = compiler.codegen_target.object_extension
end

def compile
compile_to_object
def generate_bitcode
@memory_buffer ||= llvm_mod.write_bitcode_to_memory_buffer
end

private def compile_to_object
bc_name = self.bc_name
object_name = self.object_name
temporary_object_name = self.temporary_object_name
# To compile a file we first generate a `.bc` file and then create an
# object file from it. These `.bc` files are stored in the cache
# directory.
#
# On a next compilation of the same project, and if the compile flags
# didn't change (a combination of the target triple, mcpu and link flags,
# amongst others), we check if the new `.bc` file is exactly the same as
# the old one. In that case the `.o` file will also be the same, so we
# simply reuse the old one. Generating an `.o` file is what takes most
# time.
#
# However, instead of directly generating the final `.o` file from the
# `.bc` file, we generate it to a temporary name (`.o.tmp`) and then we
# rename that file to `.o`. We do this because the compiler could be
# interrupted while the `.o` file is being generated, leading to a
# corrupted file that later would cause compilation issues. Moving a file
# is an atomic operation so no corrupted `.o` file should be generated.
def compile(isolate_context = false)
if must_compile?
isolate_module_context if isolate_context
update_bitcode_cache
compile_to_object
else
@reused_previous_compilation = true
end
dump_llvm_ir
end

private def must_compile?
memory_buffer = generate_bitcode

# To compile a file we first generate a `.bc` file and then
# create an object file from it. These `.bc` files are stored
# in the cache directory.
#
# On a next compilation of the same project, and if the compile
# flags didn't change (a combination of the target triple, mcpu
# and link flags, amongst others), we check if the new
# `.bc` file is exactly the same as the old one. In that case
# the `.o` file will also be the same, so we simply reuse the
# old one. Generating an `.o` file is what takes most time.
#
# However, instead of directly generating the final `.o` file
# from the `.bc` file, we generate it to a temporary name (`.o.tmp`)
# and then we rename that file to `.o`. We do this because the compiler
# could be interrupted while the `.o` file is being generated, leading
# to a corrupted file that later would cause compilation issues.
# Moving a file is an atomic operation so no corrupted `.o` file should
# be generated.

must_compile = true
can_reuse_previous_compilation =
compiler.emit_targets.none? && !@bc_flags_changed && File.exists?(bc_name) && File.exists?(object_name)

memory_buffer = llvm_mod.write_bitcode_to_memory_buffer

if can_reuse_previous_compilation
memory_io = IO::Memory.new(memory_buffer.to_slice)
changed = File.open(bc_name) { |bc_file| !IO.same_content?(bc_file, memory_io) }

# If the user cancelled a previous compilation
# it might be that the .o file is empty
if !changed && File.size(object_name) > 0
must_compile = false
memory_buffer.dispose
memory_buffer = nil
return false
else
# We need to compile, so we'll write the memory buffer to file
end
end

# If there's a memory buffer, it means we must create a .o from it
if memory_buffer
true
end

# Parse the previously generated bitcode into the LLVM module using a
# dedicated context, so we can safely optimize & compile the module in
# multiple threads (llvm contexts can't be shared across threads).
private def isolate_module_context
@llvm_mod = LLVM::Module.parse(@memory_buffer.not_nil!, LLVM::Context.new)
end

private def update_bitcode_cache
if memory_buffer = @memory_buffer
# Delete existing .o file. It cannot be used anymore.
File.delete?(object_name)
# Create the .bc file (for next compilations)
File.write(bc_name, memory_buffer.to_slice)
memory_buffer.dispose
end
end

if must_compile
compiler.optimize llvm_mod unless compiler.optimization_mode.o0?
compiler.target_machine.emit_obj_to_file llvm_mod, temporary_object_name
File.rename(temporary_object_name, object_name)
else
@reused_previous_compilation = true
end

dump_llvm_ir
private def compile_to_object
temporary_object_name = self.temporary_object_name
target_machine = compiler.create_target_machine
compiler.optimize llvm_mod, target_machine unless compiler.optimization_mode.o0?
target_machine.emit_obj_to_file llvm_mod, temporary_object_name
File.rename(temporary_object_name, object_name)
end

private def dump_llvm_ir
Expand Down

0 comments on commit ac91f7c

Please sign in to comment.