From 52ff5e88c4b6411ee929d4aae1e1a0a195963e33 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Tue, 19 Sep 2023 11:55:39 -0300 Subject: [PATCH] Precompilation: support modern CPU features on x86_64 This also add variants for supporting legacy CPUs on those targets. The auto-discovery feature only works for Linux targets today, but you can set a compile env to enable the legacy variants. --- .github/workflows/release.yml | 27 ++++++++++----- README.md | 13 ++++++- lib/explorer/comptime_utils.ex | 32 +++++++++++++++++ lib/explorer/polars_backend/native.ex | 27 +++++++++++++++ mix.exs | 2 +- mix.lock | 2 +- native/explorer/Cross.toml | 3 +- test/explorer/comptime_utils_test.exs | 50 +++++++++++++++++++++++++++ 8 files changed, 144 insertions(+), 12 deletions(-) create mode 100644 lib/explorer/comptime_utils.ex create mode 100644 test/explorer/comptime_utils_test.exs diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f4c9d1ab6..b389134ac 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ on: jobs: build_release: - name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }}) + name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }} | ${{ matrix.job.variant || 'default' }}) runs-on: ${{ matrix.job.os }} permissions: contents: write @@ -29,16 +29,20 @@ jobs: matrix: nif: ["2.15"] job: + - { target: aarch64-apple-darwin, os: macos-11 } - { target: aarch64-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true } - { target: aarch64-unknown-linux-musl, os: ubuntu-20.04, use-cross: true } - - { target: aarch64-apple-darwin, os: macos-11 } + - { target: riscv64gc-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true, cargo-args: "--no-default-features" } - { target: x86_64-apple-darwin, os: macos-11 } - - { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04 } + - { target: x86_64-pc-windows-gnu, os: windows-2022, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" } + - { target: x86_64-pc-windows-gnu, os: windows-2022, variant: "legacy_cpu" } + - { target: x86_64-pc-windows-msvc, os: windows-2019, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" } + - { target: x86_64-pc-windows-msvc, os: windows-2019, variant: "legacy_cpu" } + - { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" } + - { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5, variant: "legacy_cpu" } + - { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04, rustflags: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" } + - { target: x86_64-unknown-linux-gnu, os: ubuntu-20.04, variant: "legacy_cpu" } - { target: x86_64-unknown-linux-musl, os: ubuntu-20.04, use-cross: true } - - { target: riscv64gc-unknown-linux-gnu, os: ubuntu-20.04, use-cross: true, cargo-args: "--no-default-features"} - - { target: x86_64-pc-windows-gnu, os: windows-2022 } - - { target: x86_64-pc-windows-msvc, os: windows-2019 } - - { target: x86_64-unknown-freebsd, os: ubuntu-22.04, use-cross: true, cross-version: v0.2.5 } steps: - name: Checkout source code @@ -50,6 +54,12 @@ jobs: # Get the project version from mix.exs echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' mix.exs | head -n1)" >> $GITHUB_ENV + - name: Maybe add Rust flags for compilation + shell: bash + run: | + echo "RUSTFLAGS=${{ matrix.job.rustflags }}" >> $GITHUB_ENV + if: ${{ matrix.job.rustflags }} + - name: Add target shell: bash run: | @@ -64,7 +74,7 @@ jobs: - name: Build the project id: build-crate - uses: philss/rustler-precompiled-action@52c1b8dd1ed8c7fcd90ca71b177aabbd3b29b95b + uses: philss/rustler-precompiled-action@v1.1.0 with: project-name: explorer project-version: ${{ env.PROJECT_VERSION }} @@ -74,6 +84,7 @@ jobs: cross-version: ${{ matrix.job.cross-version || 'v0.2.4' }} project-dir: "native/explorer" cargo-args: ${{ matrix.job.cargo-args }} + variant: ${{ matrix.job.variant }} - name: Artifact upload uses: actions/upload-artifact@v3 diff --git a/README.md b/README.md index 70acc702d..8ff7bb7bf 100644 --- a/README.md +++ b/README.md @@ -264,10 +264,21 @@ We support the following: - `x86_64-unknown-linux-musl` - Linux running on Intel/AMD 64 bits CPUs, compiled with Musl. - `x86_64-unknown-freebsd` - FreeBSD running on Intel/AMD 64 bits. -This means that the problem is going to work without the need to compile it from source. +This means that the Explorer is going to work without the need to compile it from source. + This currently **only works for Hex releases**. For more information on how it works, please check the [RustlerPrecompiled project](https://hexdocs.pm/rustler_precompiled). +### Legacy CPUs + +We ship some of the precompiled artifacts with modern CPU features enabled by default. But in +case your computer is not compatible with them, you can set an application environment that is +going to be read at compile time, enabling the legacy variants of artifacts. + +```elixir +config :explorer, use_legacy_artifacts: true +``` + ### Features disabled Some of the features cannot be compiled to some targets, because one of the dependencies diff --git a/lib/explorer/comptime_utils.ex b/lib/explorer/comptime_utils.ex new file mode 100644 index 000000000..e265c0dec --- /dev/null +++ b/lib/explorer/comptime_utils.ex @@ -0,0 +1,32 @@ +defmodule Explorer.ComptimeUtils do + @moduledoc false + # This module is useful to control some aspects of compilation. + # It is mostly used in the `Explorer.PolarsBackend.Native` compilation. + + # Only works for Linux targets today, but we don't need more. + @doc false + def cpu_with_all_caps?(needed_flags, opts \\ []) do + opts = Keyword.validate!(opts, cpu_info_file_path: "/proc/cpuinfo", target: nil) + + case File.read(opts[:cpu_info_file_path]) do + {:ok, contents} -> + flags = + contents + |> String.split("\n") + |> Stream.filter(&String.starts_with?(&1, "flags")) + |> Stream.map(fn line -> + [_, flags] = String.split(line, ": ") + String.split(flags) + end) + |> Stream.uniq() + |> Enum.to_list() + |> List.flatten() + + Enum.all?(needed_flags, fn flag -> flag in flags end) + + {:error, _} -> + # There is no way to say, so we default to false. + false + end + end +end diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 4a8016d85..f212ef6cb 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -8,6 +8,27 @@ defmodule Explorer.PolarsBackend.Native do # We want "debug" in dev and test because it's faster to compile. mode = if Mix.env() in [:dev, :test], do: :debug, else: :release + use_legacy = + Application.compile_env( + :explorer, + :use_legacy_artifacts, + System.get_env("EXPLORER_USE_LEGACY_ARTIFACTS") in ["true", "1"] + ) + + variants_for_linux = [ + legacy_cpu: fn -> + # These are the same from the release workflow. + # See the meaning in: https://unix.stackexchange.com/a/43540 + needed_caps = ~w[fxsr sse sse2 ssse3 sse4_1 sse4_2 popcnt avx fma] + + use_legacy or + (is_nil(use_legacy) and + not Explorer.ComptimeUtils.cpu_with_all_caps?(needed_caps)) + end + ] + + other_variants = [legacy_cpu: fn -> use_legacy end] + use RustlerPrecompiled, otp_app: :explorer, version: version, @@ -24,6 +45,12 @@ defmodule Explorer.PolarsBackend.Native do x86_64-unknown-linux-musl x86_64-unknown-freebsd ), + variants: %{ + "x86_64-unknown-linux-gnu" => variants_for_linux, + "x86_64-pc-windows-msvc" => other_variants, + "x86_64-pc-windows-gnu" => other_variants, + "x86_64-unknown-freebsd" => other_variants + }, # We don't use any features of newer NIF versions, so 2.15 is enough. nif_versions: ["2.15"], mode: mode, diff --git a/mix.exs b/mix.exs index 1209aacd4..eccc170af 100644 --- a/mix.exs +++ b/mix.exs @@ -39,7 +39,7 @@ defmodule Explorer.MixProject do {:aws_signature, "~> 0.3"}, {:castore, "~> 1.0"}, {:fss, "~> 0.1"}, - {:rustler_precompiled, "~> 0.6"}, + {:rustler_precompiled, "~> 0.7"}, {:table, "~> 0.1.2"}, {:table_rex, "~> 3.1.1 or ~> 4.0.0"}, {:adbc, "~> 0.1", optional: true}, diff --git a/mix.lock b/mix.lock index 14fe9691e..0634085c9 100644 --- a/mix.lock +++ b/mix.lock @@ -27,7 +27,7 @@ "plug_crypto": {:hex, :plug_crypto, "1.2.5", "918772575e48e81e455818229bf719d4ab4181fcbf7f85b68a35620f78d89ced", [:mix], [], "hexpm", "26549a1d6345e2172eb1c233866756ae44a9609bd33ee6f99147ab3fd87fd842"}, "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, "rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"}, - "rustler_precompiled": {:hex, :rustler_precompiled, "0.6.3", "f838d94bc35e1844973ee7266127b156fdc962e9e8b7ff666c8fb4fed7964d23", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "e18ecca3669a7454b3a2be75ae6c3ef01d550bc9a8cf5fbddcfff843b881d7c6"}, + "rustler_precompiled": {:hex, :rustler_precompiled, "0.7.0", "5d0834fc06dbc76dd1034482f17b1797df0dba9b491cef8bb045fcaca94bcade", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "fdf43a6835f4e4de5bfbc4c019bfb8c46d124bd4635fefa3e20d9a2bbbec1512"}, "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"}, "table": {:hex, :table, "0.1.2", "87ad1125f5b70c5dea0307aa633194083eb5182ec537efc94e96af08937e14a8", [:mix], [], "hexpm", "7e99bc7efef806315c7e65640724bf165c3061cdc5d854060f74468367065029"}, "table_rex": {:hex, :table_rex, "4.0.0", "3c613a68ebdc6d4d1e731bc973c233500974ec3993c99fcdabb210407b90959b", [:mix], [], "hexpm", "c35c4d5612ca49ebb0344ea10387da4d2afe278387d4019e4d8111e815df8f55"}, diff --git a/native/explorer/Cross.toml b/native/explorer/Cross.toml index d039ae6ce..ce5640798 100644 --- a/native/explorer/Cross.toml +++ b/native/explorer/Cross.toml @@ -1,6 +1,7 @@ [build.env] passthrough = [ - "RUSTLER_NIF_VERSION" + "RUSTLER_NIF_VERSION", + "RUSTFLAGS" ] [target.riscv64gc-unknown-linux-gnu] diff --git a/test/explorer/comptime_utils_test.exs b/test/explorer/comptime_utils_test.exs new file mode 100644 index 000000000..509994792 --- /dev/null +++ b/test/explorer/comptime_utils_test.exs @@ -0,0 +1,50 @@ +defmodule Explorer.ComptimeUtilsTest do + use ExUnit.Case, async: true + alias Explorer.ComptimeUtils + + describe "cpu_with_all_caps?/2" do + @contents """ + processor : 31 + model : 33 + model name : AMD Ryzen 9 5950X 16-Core Processor + cpuid level : 16 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm + bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass srso + bogomips : 6786.91 + + processor : 31 + model : 33 + model name : AMD Ryzen 9 5950X 16-Core Processor + cpuid level : 16 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm + bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass srso + bogomips : 6786.91 + + """ + + @tag :tmp_dir + test "detects capabilities when CPU supports all of them", %{tmp_dir: tmp_dir} do + path = Path.join([tmp_dir, "with_avx"]) + File.write!(path, @contents) + + assert ComptimeUtils.cpu_with_all_caps?(~w[sse avx avx2], cpu_info_file_path: path) + end + + @tag :tmp_dir + test "does not detect capabilities when CPU is missing one of them", %{tmp_dir: tmp_dir} do + path = Path.join([tmp_dir, "with_avx"]) + File.write!(path, @contents) + + refute ComptimeUtils.cpu_with_all_caps?(~w[sse avx avx2 avx26], cpu_info_file_path: path) + end + + @tag :tmp_dir + test "does not detect capabilities when file is missing", %{tmp_dir: tmp_dir} do + path = Path.join([tmp_dir, "with_avx"]) + + refute ComptimeUtils.cpu_with_all_caps?(~w[avx], cpu_info_file_path: path) + end + end +end