Skip to content

Commit

Permalink
Precompilation: support modern CPU features on x86_64
Browse files Browse the repository at this point in the history
This also add variants for supporting legacy CPUs on those targets.
The auto-discovery feature only works for Linux targets today, but
you can set a compile env to enable the legacy variants.
  • Loading branch information
philss committed Sep 28, 2023
1 parent 63ae771 commit 52ff5e8
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 12 deletions.
27 changes: 19 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ on:

jobs:
build_release:
name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }})
name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }} | ${{ matrix.job.variant || 'default' }})
runs-on: ${{ matrix.job.os }}
permissions:
contents: write
Expand All @@ -29,16 +29,20 @@ jobs:
matrix:
nif: ["2.15"]
job:
- { target: aarch64-apple-darwin, os: macos-11 }
- { target: aarch64-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true }
- { target: aarch64-unknown-linux-musl, os: ubuntu-20.04, use-cross: true }
- { target: aarch64-apple-darwin, os: macos-11 }
- { target: riscv64gc-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true, cargo-args: "--no-default-features" }
- { target: x86_64-apple-darwin, os: macos-11 }
- { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04 }
- { target: x86_64-pc-windows-gnu, os: windows-2022, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" }
- { target: x86_64-pc-windows-gnu, os: windows-2022, variant: "legacy_cpu" }
- { target: x86_64-pc-windows-msvc, os: windows-2019, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" }
- { target: x86_64-pc-windows-msvc, os: windows-2019, variant: "legacy_cpu" }
- { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" }
- { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5, variant: "legacy_cpu" }
- { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" }
- { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04, variant: "legacy_cpu" }
- { target: x86_64-unknown-linux-musl, os: ubuntu-20.04, use-cross: true }
- { target: riscv64gc-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true, cargo-args: "--no-default-features"}
- { target: x86_64-pc-windows-gnu, os: windows-2022 }
- { target: x86_64-pc-windows-msvc, os: windows-2019 }
- { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5 }

steps:
- name: Checkout source code
Expand All @@ -50,6 +54,12 @@ jobs:
# Get the project version from mix.exs
echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' mix.exs | head -n1)" >> $GITHUB_ENV
- name: Maybe add Rust flags for compilation
shell: bash
run: |
echo "RUSTFLAGS=${{ matrix.job.rustflags }}" >> $GITHUB_ENV
if: ${{ matrix.job.rustflags }}

- name: Add target
shell: bash
run: |
Expand All @@ -64,7 +74,7 @@ jobs:
- name: Build the project
id: build-crate
uses: philss/rustler-precompiled-action@52c1b8dd1ed8c7fcd90ca71b177aabbd3b29b95b
uses: philss/rustler-precompiled-action@v1.1.0
with:
project-name: explorer
project-version: ${{ env.PROJECT_VERSION }}
Expand All @@ -74,6 +84,7 @@ jobs:
cross-version: ${{ matrix.job.cross-version || 'v0.2.4' }}
project-dir: "native/explorer"
cargo-args: ${{ matrix.job.cargo-args }}
variant: ${{ matrix.job.variant }}

- name: Artifact upload
uses: actions/upload-artifact@v3
Expand Down
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,21 @@ We support the following:
- `x86_64-unknown-linux-musl` - Linux running on Intel/AMD 64 bits CPUs, compiled with Musl.
- `x86_64-unknown-freebsd` - FreeBSD running on Intel/AMD 64 bits.

This means that the problem is going to work without the need to compile it from source.
This means that the Explorer is going to work without the need to compile it from source.

This currently **only works for Hex releases**. For more information on how it works, please
check the [RustlerPrecompiled project](https://hexdocs.pm/rustler_precompiled).

### Legacy CPUs

We ship some of the precompiled artifacts with modern CPU features enabled by default. But in
case your computer is not compatible with them, you can set an application environment that is
going to be read at compile time, enabling the legacy variants of artifacts.

```elixir
config :explorer, use_legacy_artifacts: true
```

### Features disabled

Some of the features cannot be compiled to some targets, because one of the dependencies
Expand Down
32 changes: 32 additions & 0 deletions lib/explorer/comptime_utils.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
defmodule Explorer.ComptimeUtils do
@moduledoc false
# This module is useful to control some aspects of compilation.
# It is mostly used in the `Explorer.PolarsBackend.Native` compilation.

# Only works for Linux targets today, but we don't need more.
@doc false
def cpu_with_all_caps?(needed_flags, opts \\ []) do
opts = Keyword.validate!(opts, cpu_info_file_path: "/proc/cpuinfo", target: nil)

case File.read(opts[:cpu_info_file_path]) do
{:ok, contents} ->
flags =
contents
|> String.split("\n")
|> Stream.filter(&String.starts_with?(&1, "flags"))
|> Stream.map(fn line ->
[_, flags] = String.split(line, ": ")
String.split(flags)
end)
|> Stream.uniq()
|> Enum.to_list()
|> List.flatten()

Enum.all?(needed_flags, fn flag -> flag in flags end)

{:error, _} ->
# There is no way to say, so we default to false.
false
end
end
end
27 changes: 27 additions & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,27 @@ defmodule Explorer.PolarsBackend.Native do
# We want "debug" in dev and test because it's faster to compile.
mode = if Mix.env() in [:dev, :test], do: :debug, else: :release

use_legacy =
Application.compile_env(
:explorer,
:use_legacy_artifacts,
System.get_env("EXPLORER_USE_LEGACY_ARTIFACTS") in ["true", "1"]
)

variants_for_linux = [
legacy_cpu: fn ->
# These are the same from the release workflow.
# See the meaning in: https://unix.stackexchange.com/a/43540
needed_caps = ~w[fxsr sse sse2 ssse3 sse4_1 sse4_2 popcnt avx fma]

use_legacy or
(is_nil(use_legacy) and
not Explorer.ComptimeUtils.cpu_with_all_caps?(needed_caps))
end
]

other_variants = [legacy_cpu: fn -> use_legacy end]

use RustlerPrecompiled,
otp_app: :explorer,
version: version,
Expand All @@ -24,6 +45,12 @@ defmodule Explorer.PolarsBackend.Native do
x86_64-unknown-linux-musl
x86_64-unknown-freebsd
),
variants: %{
"x86_64-unknown-linux-gnu" => variants_for_linux,
"x86_64-pc-windows-msvc" => other_variants,
"x86_64-pc-windows-gnu" => other_variants,
"x86_64-unknown-freebsd" => other_variants
},
# We don't use any features of newer NIF versions, so 2.15 is enough.
nif_versions: ["2.15"],
mode: mode,
Expand Down
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ defmodule Explorer.MixProject do
{:aws_signature, "~> 0.3"},
{:castore, "~> 1.0"},
{:fss, "~> 0.1"},
{:rustler_precompiled, "~> 0.6"},
{:rustler_precompiled, "~> 0.7"},
{:table, "~> 0.1.2"},
{:table_rex, "~> 3.1.1 or ~> 4.0.0"},
{:adbc, "~> 0.1", optional: true},
Expand Down
2 changes: 1 addition & 1 deletion mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"plug_crypto": {:hex, :plug_crypto, "1.2.5", "918772575e48e81e455818229bf719d4ab4181fcbf7f85b68a35620f78d89ced", [:mix], [], "hexpm", "26549a1d6345e2172eb1c233866756ae44a9609bd33ee6f99147ab3fd87fd842"},
"ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"},
"rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.6.3", "f838d94bc35e1844973ee7266127b156fdc962e9e8b7ff666c8fb4fed7964d23", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "e18ecca3669a7454b3a2be75ae6c3ef01d550bc9a8cf5fbddcfff843b881d7c6"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.7.0", "5d0834fc06dbc76dd1034482f17b1797df0dba9b491cef8bb045fcaca94bcade", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "fdf43a6835f4e4de5bfbc4c019bfb8c46d124bd4635fefa3e20d9a2bbbec1512"},
"statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"},
"table": {:hex, :table, "0.1.2", "87ad1125f5b70c5dea0307aa633194083eb5182ec537efc94e96af08937e14a8", [:mix], [], "hexpm", "7e99bc7efef806315c7e65640724bf165c3061cdc5d854060f74468367065029"},
"table_rex": {:hex, :table_rex, "4.0.0", "3c613a68ebdc6d4d1e731bc973c233500974ec3993c99fcdabb210407b90959b", [:mix], [], "hexpm", "c35c4d5612ca49ebb0344ea10387da4d2afe278387d4019e4d8111e815df8f55"},
Expand Down
3 changes: 2 additions & 1 deletion native/explorer/Cross.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[build.env]
passthrough = [
"RUSTLER_NIF_VERSION"
"RUSTLER_NIF_VERSION",
"RUSTFLAGS"
]

[target.riscv64gc-unknown-linux-gnu]
Expand Down
50 changes: 50 additions & 0 deletions test/explorer/comptime_utils_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
defmodule Explorer.ComptimeUtilsTest do
use ExUnit.Case, async: true
alias Explorer.ComptimeUtils

describe "cpu_with_all_caps?/2" do
@contents """
processor : 31
model : 33
model name : AMD Ryzen 9 5950X 16-Core Processor
cpuid level : 16
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass srso
bogomips : 6786.91
processor : 31
model : 33
model name : AMD Ryzen 9 5950X 16-Core Processor
cpuid level : 16
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass srso
bogomips : 6786.91
"""

@tag :tmp_dir
test "detects capabilities when CPU supports all of them", %{tmp_dir: tmp_dir} do
path = Path.join([tmp_dir, "with_avx"])
File.write!(path, @contents)

assert ComptimeUtils.cpu_with_all_caps?(~w[sse avx avx2], cpu_info_file_path: path)
end

@tag :tmp_dir
test "does not detect capabilities when CPU is missing one of them", %{tmp_dir: tmp_dir} do
path = Path.join([tmp_dir, "with_avx"])
File.write!(path, @contents)

refute ComptimeUtils.cpu_with_all_caps?(~w[sse avx avx2 avx26], cpu_info_file_path: path)
end

@tag :tmp_dir
test "does not detect capabilities when file is missing", %{tmp_dir: tmp_dir} do
path = Path.join([tmp_dir, "with_avx"])

refute ComptimeUtils.cpu_with_all_caps?(~w[avx], cpu_info_file_path: path)
end
end
end

0 comments on commit 52ff5e8

Please sign in to comment.