feat(lib): add forbid_unsafe feature to disable unsafe code (#413)

* add default 'unsafe' feature, remove 'read_byte_unchecked' * feature flag 'slice_unchecked' * feature flag remaining unsafety * fix misnamed feature flags * fix a few more feature flags * lints and formatting * correct the auto-correct * Rename feature to forbid_unsafe so unsafe code is the default * reintroduce 'read_byte_unchecked' * don't use --all-features as the default, as that removes unsafe code paths * add safe path to codegen and fix regression on unsafe path * enable 'forbid_unsafe' on logos-derive/codegen if enabled on 'logos' and macro is re-exported * fmt * additionally run benchmarks against forbid_unsafe * add --features forbid_unsafe to the new benchmarks * handle case of base branch not supporting forbid_unsafe feature * include comment if comparing against defaults instead of before * troubleshoot why the two benchmark runs seem to be the same * try renaming the baselines * seems baseline names may only include underscores * and display the correct results for forbid_unsafe * test default features and forbid unsafe in test matrix * add forbid_unsafe to integration test * add safety note to source trait * add safety note to source trait * wordsmithing * include note on feature unification * fmt
maciejhirsz · Sep 13, 2024 · 028bd89 · 028bd89
1 parent c51d4d3
commit 028bd89
Show file tree

Hide file tree

Showing 20 changed files with 242 additions and 26 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -14,7 +14,7 @@ jobs:
 
     - name: Generate code coverage
       run: |
-        cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml
+        cargo +nightly tarpaulin --verbose --features debug --workspace --timeout 120 --out Xml
 
     - name: Upload to codecov.io
       uses: codecov/codecov-action@v4

diff --git a/.github/workflows/rustbench.yml b/.github/workflows/rustbench.yml
@@ -29,32 +29,59 @@ jobs:
         version: latest
 
     - name: Run Benchmarks on changes
-      run: cargo bench --workspace --bench bench -- --save-baseline changes
+      run: |
+        cargo bench --workspace --bench bench -- --save-baseline default_changes
+        cargo bench --workspace --bench bench --features forbid_unsafe  -- --save-baseline forbid_unsafe_changes
 
     - uses: actions/checkout@v4
       with:
         ref: ${{ github.event.pull_request.base.sha }}
         clean: false
 
     - name: Run Benchmarks before changes
-      run: cargo bench --workspace --bench bench -- --save-baseline before
+      run: |
+        cargo bench --workspace --bench bench -- --save-baseline default_before
+
+        # Skip benchmarking for forbid_unsafe feature if the PR base commit doesn't have the feature
+        if [[ "$(grep forbid_unsafe Cargo.toml)" ]]; then
+          cargo bench --workspace --bench bench --features forbid_unsafe -- --save-baseline forbid_unsafe_before
+        fi
 
     - name: Compare benchmarks
       run: |
         echo 'results<<EOF' >> $GITHUB_OUTPUT
-        critcmp before changes >> $GITHUB_OUTPUT
+        critcmp default_before default_changes >> $GITHUB_OUTPUT
         echo 'EOF' >> $GITHUB_OUTPUT
       id: compare
 
+    - name: Compare benchmarks for forbid-unsafe
+      run: |
+        echo 'results<<EOF' >> $GITHUB_OUTPUT
+        if [[ "$(grep forbid_unsafe Cargo.toml)" ]]; then
+          critcmp forbid_unsafe_before forbid_unsafe_changes >> $GITHUB_OUTPUT
+        else 
+          echo 'NOTE: PR base commit does not support the "forbid_unsafe" feature;' >> $GITHUB_OUTPUT
+          echo 'comparing forbid_unsafe against the default features on base instead.' >> $GITHUB_OUTPUT
+          critcmp default_before forbid_unsafe_changes >> $GITHUB_OUTPUT
+        fi
+        echo 'EOF' >> $GITHUB_OUTPUT
+      id: compare-forbid-unsafe
+
     - name: Comment PR with benchmarks
       uses: thollander/actions-comment-pull-request@v2
       continue-on-error: true
       with:
         message: |
-          Benchmark results:
+          Benchmark results with default features:
           ```
           ${{ steps.compare.outputs.results }}
           ```
+
+          Benchmark results with feature "forbid_unsafe":
+          ```
+          ${{ steps.compare-forbid-unsafe.outputs.results }}
+          ```
+
         comment_tag: benchmarks
 
       id: comment
@@ -66,3 +93,8 @@ jobs:
         echo '```' >> $GITHUB_STEP_SUMMARY
         echo '${{ steps.compare.outputs.results }}' >> $GITHUB_STEP_SUMMARY
         echo '```' >> $GITHUB_STEP_SUMMARY
+
+        echo '### Benchmark results with forbid-unsafe' >> $GITHUB_STEP_SUMMARY
+        echo '```' >> $GITHUB_STEP_SUMMARY
+        echo '${{ steps.compare-forbid-unsafe.outputs.results }}' >> $GITHUB_STEP_SUMMARY
+        echo '```' >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml
@@ -22,4 +22,4 @@ jobs:
       uses: Swatinem/rust-cache@v2
 
     - name: Check rustdoc build
-      run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --all-features -Zunstable-options -Zrustdoc-scrape-examples
+      run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --features debug -Zunstable-options -Zrustdoc-scrape-examples
diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml
@@ -22,7 +22,7 @@ jobs:
       uses: Swatinem/rust-cache@v2
 
     - name: Check rustdoc build
-      run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --all-features -Zunstable-options -Zrustdoc-scrape-examples
+      run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --features debug -Zunstable-options -Zrustdoc-scrape-examples
 
   tests:
     name: Tests
@@ -36,6 +36,9 @@ jobs:
         - macos-latest
         - ubuntu-latest
         - windows-latest
+        features:
+        - ""  # default features
+        - "--features forbid_unsafe"
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -52,4 +55,4 @@ jobs:
       uses: actions-rs/cargo@v1
       with:
         command: test
-        args: --workspace --verbose
+        args: --workspace --verbose ${{ matrix.features }}
diff --git a/.github/workflows/rustlints.yml b/.github/workflows/rustlints.yml
@@ -21,7 +21,7 @@ jobs:
         components: clippy
 
     - name: Check clippy
-      run: cargo clippy --all-features -- -D warnings
+      run: cargo clippy --features debug -- -D warnings
 
   rustfmt:
     name: Rustfmt

diff --git a/.github/workflows/rustmsrv.yml b/.github/workflows/rustmsrv.yml
@@ -24,4 +24,4 @@ jobs:
         version: ^0.15.1
 
     - name: Check MSRV
-      run: cargo msrv verify -- cargo check --workspace --all-features
+      run: cargo msrv verify -- cargo check --workspace --features debug
diff --git a/Cargo.toml b/Cargo.toml
@@ -51,9 +51,11 @@ default = ["export_derive", "std"]
 export_derive = ["logos-derive"]
 # Should the crate use the standard library?
 std = []
+# Use safe alternatives for unsafe code (may impact performance)?
+forbid_unsafe = ["logos-derive?/forbid_unsafe"]
 
 [package.metadata.docs.rs]
-all-features = true
+features = ["debug"]
 cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]
 rustdoc-args = ["--cfg", "docsrs"]
 

diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
@@ -11,6 +11,7 @@
 + [Using callbacks](./callbacks.md)
 + [Common regular expressions](./common-regex.md)
 + [Debugging](./debugging.md)
++ [Unsafe Code](./unsafe.md)
 + [Examples](./examples.md)
   + [Brainfuck interpreter](./examples/brainfuck.md)
   + [JSON parser](./examples/json.md)

diff --git a/book/src/contributing/setup.md b/book/src/contributing/setup.md
@@ -81,7 +81,7 @@ configuration to the one used by [docs.rs](https://docs.rs/logos/latest/logos/):
 
 ```bash
 RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc \
-    --all-features \
+    --features debug \
     -Zunstable-options \
     -Zrustdoc-scrape-examples \
     --no-deps \

diff --git a/book/src/unsafe.md b/book/src/unsafe.md
@@ -0,0 +1,36 @@
+# Unsafe Code
+
+By default, **Logos** uses unsafe code to avoid unnecessary bounds checks while
+accessing slices of the input `Source`.
+
+This unsafe code also exists in the code generated by the `Logos` derive macro,
+which generates a deterministic finite automata (DFA). Reasoning about the correctness
+of this generated code can be difficult - if the derivation of the DFA in `Logos`
+is correct, then this generated code will be correct and any mistakes in implementation
+would be caught given sufficient fuzz testing.
+
+Use of unsafe code is the default as this typically provides the fastest parser.
+
+## Disabling Unsafe Code
+
+However, for applications accepting untrusted input in a trusted context, this
+may not be a sufficient correctness justification.
+
+For those applications which cannot tolerate unsafe code, the feature `forbid-unsafe`
+may be enabled. This replaces unchecked accesses in the `Logos` crate with safe,
+checked alternatives which will panic on out-of-bounds access rather than cause
+undefined behavior. Additionally, code generated by the macro will not use the
+unsafe keyword, so generated code may be used in a crates using the 
+`#![forbid(unsafe_code)]` attribute.
+
+When the `forbid-unsafe` feature is added to a direct dependency on the `Logos` crate,
+[Feature Unification](https://doc.rust-lang.org/cargo/reference/features.html#feature-unification)
+ensures any transitive inclusion of `Logos` via other dependencies also have unsafe
+code disabled.
+
+Generally, disabling unsafe code will result in a slower parser.
+
+However making definitive statements around performance of safe-only code is difficult,
+as there are too many variables to consider between compiler optimizations,
+the specific grammar being parsed, and the target processor. The automated benchmarks
+of this crate show around a 10% slowdown in safe-only code at the time of this writing.
diff --git a/logos-codegen/Cargo.toml b/logos-codegen/Cargo.toml
@@ -16,6 +16,8 @@ rstest = "0.18.2"
 debug = []
 # Exports out internal methods for fuzzing
 fuzzing = []
+# Don't use or generate unsafe code
+forbid_unsafe = []
 
 [lib]
 bench = false

diff --git a/logos-codegen/src/generator/context.rs b/logos-codegen/src/generator/context.rs
@@ -59,12 +59,20 @@ impl Context {
         self.available.saturating_sub(self.at)
     }
 
-    pub fn read_byte_unchecked(&mut self) -> TokenStream {
+    pub fn read_byte(&mut self) -> TokenStream {
         let at = self.at;
 
         self.advance(1);
 
-        quote!(lex.read_byte_unchecked(#at))
+        #[cfg(not(feature = "forbid_unsafe"))]
+        {
+            quote!(unsafe { lex.read_byte_unchecked(#at) })
+        }
+
+        #[cfg(feature = "forbid_unsafe")]
+        {
+            quote!(lex.read_byte(#at))
+        }
     }
 
     pub fn read(&mut self, len: usize) -> TokenStream {

diff --git a/logos-codegen/src/generator/fork.rs b/logos-codegen/src/generator/fork.rs
@@ -136,9 +136,9 @@ impl<'a> Generator<'a> {
         let min_read = self.meta[this].min_read;
 
         if ctx.remainder() >= max(min_read, 1) {
-            let read = ctx.read_byte_unchecked();
+            let read = ctx.read_byte();
 
-            return (quote!(byte), quote!(let byte = unsafe { #read };));
+            return (quote!(byte), quote!(let byte = #read;));
         }
 
         match min_read {

diff --git a/logos-derive/Cargo.toml b/logos-derive/Cargo.toml
@@ -4,6 +4,8 @@ logos-codegen = {version = "0.14.1", path = "../logos-codegen"}
 [features]
 # Enables debug messages
 debug = ["logos-codegen/debug"]
+# Don't use or generate unsafe code
+forbid_unsafe = ["logos-codegen/forbid_unsafe"]
 
 [lib]
 bench = false

diff --git a/src/internal.rs b/src/internal.rs
@@ -17,8 +17,13 @@ pub trait LexerInternal<'source> {
     fn read_at<T: Chunk<'source>>(&self, n: usize) -> Option<T>;
 
     /// Unchecked read a byte at current position, offset by `n`.
+    #[cfg(not(feature = "forbid_unsafe"))]
     unsafe fn read_byte_unchecked(&self, n: usize) -> u8;
 
+    /// Checked read a byte at current position, offset by `n`.
+    #[cfg(feature = "forbid_unsafe")]
+    fn read_byte(&self, n: usize) -> u8;
+
     /// Test a chunk at current position with a closure.
     fn test<T: Chunk<'source>, F: FnOnce(T) -> bool>(&self, test: F) -> bool;