From b1d061926376965805ef3ece3e32d94df81462a6 Mon Sep 17 00:00:00 2001 From: jfecher Date: Tue, 24 Sep 2024 13:45:23 -0500 Subject: [PATCH] fix: Handle multi-byte utf8 characters in formatter (#6118) # Description ## Problem\* Resolves https://github.com/noir-lang/noir/issues/6108 ## Summary\* ## Additional Context ## Documentation\* Check one: - [x] No documentation needed. - [ ] Documentation included in this PR. - [ ] **[For Experimental Features]** Documentation to be submitted in a separate PR. # PR Checklist\* - [x] I have tested the changes locally. - [x] I have formatted the changes with [Prettier](https://prettier.io/) and/or `cargo fmt` on default settings. --------- Co-authored-by: Michael Klein Co-authored-by: Michael J Klein Co-authored-by: TomAFrench --- .github/workflows/test-js-packages.yml | 69 ++++++++++++++++++--- compiler/noirc_frontend/src/lexer/errors.rs | 6 ++ compiler/noirc_frontend/src/lexer/lexer.rs | 24 +++++++ noir_stdlib/src/collections/map.nr | 4 +- noir_stdlib/src/ec/mod.nr | 14 ++--- noir_stdlib/src/ec/montcurve.nr | 12 ++-- noir_stdlib/src/ec/swcurve.nr | 8 +-- noir_stdlib/src/ec/tecurve.nr | 16 ++--- noir_stdlib/src/field/mod.nr | 2 +- noir_stdlib/src/hash/poseidon/bn254.nr | 2 +- tooling/nargo_fmt/src/visitor.rs | 17 ++++- 11 files changed, 135 insertions(+), 39 deletions(-) diff --git a/.github/workflows/test-js-packages.yml b/.github/workflows/test-js-packages.yml index 9f46e6f98e8..e45a482cc59 100644 --- a/.github/workflows/test-js-packages.yml +++ b/.github/workflows/test-js-packages.yml @@ -183,7 +183,7 @@ jobs: with: name: acvm-js path: ./acvm-repo/acvm_js - + - name: Set up test environment uses: ./.github/actions/setup @@ -230,13 +230,13 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - + - name: Download nargo binary uses: actions/download-artifact@v4 with: name: nargo path: ./nargo - + - name: Download artifact uses: actions/download-artifact@v4 with: @@ -248,7 +248,7 @@ jobs: with: name: noirc_abi_wasm path: ./tooling/noirc_abi_wasm - + - name: Set nargo on PATH run: | nargo_binary="${{ github.workspace }}/nargo/nargo" @@ -336,13 +336,13 @@ jobs: with: name: acvm-js path: ./acvm-repo/acvm_js - + - name: Download noirc_abi package artifact uses: actions/download-artifact@v4 with: name: noirc_abi_wasm path: ./tooling/noirc_abi_wasm - + - name: Set nargo on PATH run: | nargo_binary="${{ github.workspace }}/nargo/nargo" @@ -468,7 +468,7 @@ jobs: working-directory: ./compiler/integration-tests run: | yarn test:browser - + test-examples: name: Example scripts runs-on: ubuntu-latest @@ -509,6 +509,59 @@ jobs: working-directory: ./examples/codegen_verifier run: ./test.sh + external-repo-checks: + needs: [build-nargo] + runs-on: ubuntu-latest + # Only run when 'run-external-checks' label is present + if: contains(github.event.pull_request.labels.*.name, 'run-external-checks') + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + project: + # Disabled as these are currently failing with many visibility errors + # - { repo: AztecProtocol/aztec-nr, path: ./ } + # - { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-contracts } + # Disabled as aztec-packages requires a setup-step in order to generate a `Nargo.toml` + #- { repo: AztecProtocol/aztec-packages, path: ./noir-projects/noir-protocol-circuits } + - { repo: zac-williamson/noir-edwards, path: ./, ref: 037e44b2ee8557c51f6aef9bb9d63ea9e32722d1 } + # TODO: Enable these once they're passing against master again. + # - { repo: zac-williamson/noir-bignum, path: ./, ref: 030c2acce1e6b97c44a3bbbf3429ed96f20d72d3 } + # - { repo: vlayer-xyz/monorepo, path: ./, ref: ee46af88c025863872234eb05d890e1e447907cb } + # - { repo: hashcloak/noir-bigint, path: ./, ref: 940ddba3a5201b508e7b37a2ef643551afcf5ed8 } + name: Check external repo - ${{ matrix.project.repo }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ matrix.project.repo }} + path: test-repo + ref: ${{ matrix.project.ref }} + + - name: Download nargo binary + uses: actions/download-artifact@v4 + with: + name: nargo + path: ./nargo + + - name: Set nargo on PATH + run: | + nargo_binary="${{ github.workspace }}/nargo/nargo" + chmod +x $nargo_binary + echo "$(dirname $nargo_binary)" >> $GITHUB_PATH + export PATH="$PATH:$(dirname $nargo_binary)" + nargo -V + + - name: Remove requirements on compiler version + working-directory: ./test-repo + run: | + # Github actions seems to not expand "**" in globs by default. + shopt -s globstar + sed -i '/^compiler_version/d' ./**/Nargo.toml + - name: Run nargo check + working-directory: ./test-repo/${{ matrix.project.path }} + run: nargo check + # This is a job which depends on all test jobs and reports the overall status. # This allows us to add/remove test jobs without having to update the required workflows. tests-end: @@ -526,7 +579,7 @@ jobs: - test-integration-node - test-integration-browser - test-examples - + steps: - name: Report overall success run: | diff --git a/compiler/noirc_frontend/src/lexer/errors.rs b/compiler/noirc_frontend/src/lexer/errors.rs index 2440109af15..6e64c509195 100644 --- a/compiler/noirc_frontend/src/lexer/errors.rs +++ b/compiler/noirc_frontend/src/lexer/errors.rs @@ -34,6 +34,8 @@ pub enum LexerErrorKind { InvalidEscape { escaped: char, span: Span }, #[error("Invalid quote delimiter `{delimiter}`, valid delimiters are `{{`, `[`, and `(`")] InvalidQuoteDelimiter { delimiter: SpannedToken }, + #[error("Non-ASCII characters are invalid in comments")] + NonAsciiComment { span: Span }, #[error("Expected `{end_delim}` to close this {start_delim}")] UnclosedQuote { start_delim: SpannedToken, end_delim: Token }, } @@ -65,6 +67,7 @@ impl LexerErrorKind { LexerErrorKind::UnterminatedStringLiteral { span } => *span, LexerErrorKind::InvalidEscape { span, .. } => *span, LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(), + LexerErrorKind::NonAsciiComment { span, .. } => *span, LexerErrorKind::UnclosedQuote { start_delim, .. } => start_delim.to_span(), } } @@ -124,6 +127,9 @@ impl LexerErrorKind { LexerErrorKind::InvalidQuoteDelimiter { delimiter } => { (format!("Invalid quote delimiter `{delimiter}`"), "Valid delimiters are `{`, `[`, and `(`".to_string(), delimiter.to_span()) }, + LexerErrorKind::NonAsciiComment { span } => { + ("Non-ASCII character in comment".to_string(), "Invalid comment character: only ASCII is currently supported.".to_string(), *span) + } LexerErrorKind::UnclosedQuote { start_delim, end_delim } => { ("Unclosed `quote` expression".to_string(), format!("Expected a `{end_delim}` to close this `{start_delim}`"), start_delim.to_span()) } diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index a4ec19ba363..95eb41fd6d0 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -606,6 +606,11 @@ impl<'a> Lexer<'a> { }; let comment = self.eat_while(None, |ch| ch != '\n'); + if !comment.is_ascii() { + let span = Span::from(start..self.position); + return Err(LexerErrorKind::NonAsciiComment { span }); + } + if doc_style.is_none() && self.skip_comments { return self.next_token(); } @@ -651,6 +656,11 @@ impl<'a> Lexer<'a> { } if depth == 0 { + if !content.is_ascii() { + let span = Span::from(start..self.position); + return Err(LexerErrorKind::NonAsciiComment { span }); + } + if doc_style.is_none() && self.skip_comments { return self.next_token(); } @@ -1331,6 +1341,7 @@ mod tests { Err(LexerErrorKind::InvalidIntegerLiteral { .. }) | Err(LexerErrorKind::UnexpectedCharacter { .. }) + | Err(LexerErrorKind::NonAsciiComment { .. }) | Err(LexerErrorKind::UnterminatedBlockComment { .. }) => { expected_token_found = true; } @@ -1389,4 +1400,17 @@ mod tests { } } } + + #[test] + fn test_non_ascii_comments() { + let cases = vec!["// 🙂", "// schön", "/* in the middle 🙂 of a comment */"]; + + for source in cases { + let mut lexer = Lexer::new(source); + assert!( + lexer.any(|token| matches!(token, Err(LexerErrorKind::NonAsciiComment { .. }))), + "Expected NonAsciiComment error" + ); + } + } } diff --git a/noir_stdlib/src/collections/map.nr b/noir_stdlib/src/collections/map.nr index d3c4d3d99b4..a336a01d101 100644 --- a/noir_stdlib/src/collections/map.nr +++ b/noir_stdlib/src/collections/map.nr @@ -4,7 +4,7 @@ use crate::default::Default; use crate::hash::{Hash, Hasher, BuildHasher}; use crate::collections::bounded_vec::BoundedVec; -// We use load factor α_max = 0.75. +// We use load factor alpha_max = 0.75. // Upon exceeding it, assert will fail in order to inform the user // about performance degradation, so that he can adjust the capacity. global MAX_LOAD_FACTOR_NUMERATOR = 3; @@ -624,7 +624,7 @@ impl HashMap { (hash + (attempt + attempt * attempt) / 2) % N } - // Amount of elements in the table in relation to available slots exceeds α_max. + // Amount of elements in the table in relation to available slots exceeds alpha_max. // To avoid a comparatively more expensive division operation // we conduct cross-multiplication instead. // n / m >= MAX_LOAD_FACTOR_NUMERATOR / MAX_LOAD_FACTOR_DEN0MINATOR diff --git a/noir_stdlib/src/ec/mod.nr b/noir_stdlib/src/ec/mod.nr index 093852acc79..3c1ba87eb9f 100644 --- a/noir_stdlib/src/ec/mod.nr +++ b/noir_stdlib/src/ec/mod.nr @@ -3,15 +3,15 @@ // ======== // The following three elliptic curve representations are admissible: mod tecurve; // Twisted Edwards curves -mod swcurve; // Elliptic curves in Short Weierstraß form +mod swcurve; // Elliptic curves in Short Weierstrass form mod montcurve; // Montgomery curves mod consts; // Commonly used curve presets // // Note that Twisted Edwards and Montgomery curves are (birationally) equivalent, so that -// they may be freely converted between one another, whereas Short Weierstraß curves are +// they may be freely converted between one another, whereas Short Weierstrass curves are // more general. Diagramatically: // -// tecurve == montcurve ⊂ swcurve +// tecurve == montcurve `subset` swcurve // // Each module is further divided into two submodules, 'affine' and 'curvegroup', depending // on the preferred coordinate representation. Affine coordinates are none other than the usual @@ -47,7 +47,7 @@ mod consts; // Commonly used curve presets // coordinates by calling the `into_group` (resp. `into_affine`) method on them. Finally, // Points may be freely mapped between their respective Twisted Edwards and Montgomery // representations by calling the `into_montcurve` or `into_tecurve` methods. For mappings -// between Twisted Edwards/Montgomery curves and Short Weierstraß curves, see the Curve section +// between Twisted Edwards/Montgomery curves and Short Weierstrass curves, see the Curve section // below, as the underlying mappings are those of curves rather than ambient spaces. // As a rule, Points in affine (or CurveGroup) coordinates are mapped to Points in affine // (resp. CurveGroup) coordinates. @@ -91,21 +91,21 @@ mod consts; // Commonly used curve presets // Curve configurations may also be converted between different curve representations by calling the `into_swcurve`, // `into_montcurve` and `into_tecurve` methods subject to the relation between the curve representations mentioned // above. Note that it is possible to map Points from a Twisted Edwards/Montgomery curve to the corresponding -// Short Weierstraß representation and back, and the methods to do so are exposed as `map_into_swcurve` and +// Short Weierstrass representation and back, and the methods to do so are exposed as `map_into_swcurve` and // `map_from_swcurve`, which each take one argument, the point to be mapped. // // Curve maps // ========== // There are a few different ways of mapping Field elements to elliptic curves. Here we provide the simplified // Shallue-van de Woestijne-Ulas and Elligator 2 methods, the former being applicable to all curve types -// provided above subject to the constraint that the coefficients of the corresponding Short Weierstraß curve satisfies +// provided above subject to the constraint that the coefficients of the corresponding Short Weierstrass curve satisfies // a*b != 0 and the latter being applicable to Montgomery and Twisted Edwards curves subject to the constraint that // the coefficients of the corresponding Montgomery curve satisfy j*k != 0 and (j^2 - 4)/k^2 is non-square. // // The simplified Shallue-van de Woestijne-Ulas method is exposed as the method `swu_map` on the Curve configuration and // depends on two parameters, a Field element z != -1 for which g(x) - z is irreducible over Field and g(b/(z*a)) is // square, where g(x) = x^3 + a*x + b is the right-hand side of the defining equation of the corresponding Short -// Weierstraß curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above, +// Weierstrass curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above, // it may be determined using the scripts provided at that z = 5. // // The Elligator 2 method is exposed as the method `elligator2_map` on the Curve configurations of Montgomery and diff --git a/noir_stdlib/src/ec/montcurve.nr b/noir_stdlib/src/ec/montcurve.nr index 68b5c67dcba..395e8528b45 100644 --- a/noir_stdlib/src/ec/montcurve.nr +++ b/noir_stdlib/src/ec/montcurve.nr @@ -145,7 +145,7 @@ mod affine { TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve()) } - // Conversion to equivalent Short Weierstraß curve + // Conversion to equivalent Short Weierstrass curve pub fn into_swcurve(self) -> SWCurve { let j = self.j; let k = self.k; @@ -155,7 +155,7 @@ mod affine { SWCurve::new(a0, b0, self.map_into_swcurve(self.gen)) } - // Point mapping into equivalent Short Weierstraß curve + // Point mapping into equivalent Short Weierstrass curve pub fn map_into_swcurve(self, p: Point) -> SWPoint { if p.is_zero() { SWPoint::zero() @@ -164,7 +164,7 @@ mod affine { } } - // Point mapping from equivalent Short Weierstraß curve + // Point mapping from equivalent Short Weierstrass curve fn map_from_swcurve(self, p: SWPoint) -> Point { let SWPoint {x, y, infty} = p; let j = self.j; @@ -347,7 +347,7 @@ mod curvegroup { TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve()) } - // Conversion to equivalent Short Weierstraß curve + // Conversion to equivalent Short Weierstrass curve fn into_swcurve(self) -> SWCurve { let j = self.j; let k = self.k; @@ -357,12 +357,12 @@ mod curvegroup { SWCurve::new(a0, b0, self.map_into_swcurve(self.gen)) } - // Point mapping into equivalent Short Weierstraß curve + // Point mapping into equivalent Short Weierstrass curve pub fn map_into_swcurve(self, p: Point) -> SWPoint { self.into_affine().map_into_swcurve(p.into_affine()).into_group() } - // Point mapping from equivalent Short Weierstraß curve + // Point mapping from equivalent Short Weierstrass curve fn map_from_swcurve(self, p: SWPoint) -> Point { self.into_affine().map_from_swcurve(p.into_affine()).into_group() } diff --git a/noir_stdlib/src/ec/swcurve.nr b/noir_stdlib/src/ec/swcurve.nr index 238b0ce3c91..839069e1fd8 100644 --- a/noir_stdlib/src/ec/swcurve.nr +++ b/noir_stdlib/src/ec/swcurve.nr @@ -1,5 +1,5 @@ mod affine { - // Affine representation of Short Weierstraß curves + // Affine representation of Short Weierstrass curves // Points are represented by two-dimensional Cartesian coordinates. // Group operations are implemented in terms of those in CurveGroup (in this case, extended Twisted Edwards) coordinates // for reasons of efficiency, cf. . @@ -10,7 +10,7 @@ mod affine { use crate::cmp::Eq; // Curve specification - pub struct Curve { // Short Weierstraß curve + pub struct Curve { // Short Weierstrass curve // Coefficients in defining equation y^2 = x^3 + ax + b a: Field, b: Field, @@ -187,14 +187,14 @@ mod affine { } mod curvegroup { - // CurveGroup representation of Weierstraß curves + // CurveGroup representation of Weierstrass curves // Points are represented by three-dimensional Jacobian coordinates. // See for details. use crate::ec::swcurve::affine; use crate::cmp::Eq; // Curve specification - pub struct Curve { // Short Weierstraß curve + pub struct Curve { // Short Weierstrass curve // Coefficients in defining equation y^2 = x^3 + axz^4 + bz^6 a: Field, b: Field, diff --git a/noir_stdlib/src/ec/tecurve.nr b/noir_stdlib/src/ec/tecurve.nr index 760d9dc2b82..b306873806d 100644 --- a/noir_stdlib/src/ec/tecurve.nr +++ b/noir_stdlib/src/ec/tecurve.nr @@ -166,17 +166,17 @@ mod affine { MCurve::new(j, k, gen_montcurve) } - // Conversion to equivalent Short Weierstraß curve + // Conversion to equivalent Short Weierstrass curve pub fn into_swcurve(self) -> SWCurve { self.into_montcurve().into_swcurve() } - // Point mapping into equivalent Short Weierstraß curve + // Point mapping into equivalent Short Weierstrass curve pub fn map_into_swcurve(self, p: Point) -> SWPoint { self.into_montcurve().map_into_swcurve(p.into_montcurve()) } - // Point mapping from equivalent Short Weierstraß curve + // Point mapping from equivalent Short Weierstrass curve fn map_from_swcurve(self, p: SWPoint) -> Point { self.into_montcurve().map_from_swcurve(p).into_tecurve() } @@ -195,7 +195,7 @@ mod affine { mod curvegroup { // CurveGroup coordinate representation of Twisted Edwards curves // Points are represented by four-dimensional projective coordinates, viz. extended Twisted Edwards coordinates. - // See §3 of for details. + // See section 3 of for details. use crate::ec::tecurve::affine; use crate::ec::montcurve::curvegroup::Curve as MCurve; use crate::ec::montcurve::curvegroup::Point as MPoint; @@ -317,7 +317,7 @@ mod curvegroup { Point::new(x, y, t, z) } - // Point doubling, cf. §3.3 + // Point doubling, cf. section 3.3 pub fn double(self, p: Point) -> Point { let Point{x, y, t: _t, z} = p; @@ -385,17 +385,17 @@ mod curvegroup { self.into_affine().into_montcurve().into_group() } - // Conversion to equivalent Short Weierstraß curve + // Conversion to equivalent Short Weierstrass curve fn into_swcurve(self) -> SWCurve { self.into_montcurve().into_swcurve() } - // Point mapping into equivalent short Weierstraß curve + // Point mapping into equivalent short Weierstrass curve pub fn map_into_swcurve(self, p: Point) -> SWPoint { self.into_montcurve().map_into_swcurve(p.into_montcurve()) } - // Point mapping from equivalent short Weierstraß curve + // Point mapping from equivalent short Weierstrass curve fn map_from_swcurve(self, p: SWPoint) -> Point { self.into_montcurve().map_from_swcurve(p).into_tecurve() } diff --git a/noir_stdlib/src/field/mod.nr b/noir_stdlib/src/field/mod.nr index 176f102321d..e1d08c6c230 100644 --- a/noir_stdlib/src/field/mod.nr +++ b/noir_stdlib/src/field/mod.nr @@ -118,7 +118,7 @@ impl Field { r } - // Parity of (prime) Field element, i.e. sgn0(x mod p) = 0 if x ∈ {0, ..., p-1} is even, otherwise sgn0(x mod p) = 1. + // Parity of (prime) Field element, i.e. sgn0(x mod p) = 0 if x `elem` {0, ..., p-1} is even, otherwise sgn0(x mod p) = 1. pub fn sgn0(self) -> u1 { self as u1 } diff --git a/noir_stdlib/src/hash/poseidon/bn254.nr b/noir_stdlib/src/hash/poseidon/bn254.nr index 848d561f755..a1c78a9b31c 100644 --- a/noir_stdlib/src/hash/poseidon/bn254.nr +++ b/noir_stdlib/src/hash/poseidon/bn254.nr @@ -4,7 +4,7 @@ mod consts; use crate::hash::poseidon::absorb; -// Variable-length Poseidon-128 sponge as suggested in second bullet point of §3 of https://eprint.iacr.org/2019/458.pdf +// Variable-length Poseidon-128 sponge as suggested in second bullet point of section 3 of https://eprint.iacr.org/2019/458.pdf #[field(bn254)] pub fn sponge(msg: [Field; N]) -> Field { absorb(consts::x5_5_config(), [0; 5], 4, 1, msg)[1] diff --git a/tooling/nargo_fmt/src/visitor.rs b/tooling/nargo_fmt/src/visitor.rs index db084e5a49d..50f1ca69fcd 100644 --- a/tooling/nargo_fmt/src/visitor.rs +++ b/tooling/nargo_fmt/src/visitor.rs @@ -36,7 +36,7 @@ impl<'me> FmtVisitor<'me> { pub(crate) fn slice(&self, span: impl Into) -> &'me str { let span = span.into(); - &self.source[span.start() as usize..span.end() as usize] + str_slice(self.source, span.start() as usize, span.end() as usize) } pub(crate) fn span_after(&self, span: impl Into, token: Token) -> Span { @@ -188,7 +188,7 @@ impl<'me> FmtVisitor<'me> { match comment.token() { Token::LineComment(_, _) | Token::BlockComment(_, _) => { - let comment = &slice[span.start() as usize..span.end() as usize]; + let comment = str_slice(slice, span.start() as usize, span.end() as usize); if result.ends_with('\n') { result.push_str(&indent); } else if !self.at_start() { @@ -247,6 +247,19 @@ impl<'me> FmtVisitor<'me> { } } +pub(crate) fn str_slice(s: &str, start: usize, end: usize) -> &str { + &s[start..ceil_char_boundary(s, end)] +} + +pub(crate) fn ceil_char_boundary(s: &str, byte_index: usize) -> usize { + for i in byte_index..s.len() { + if s.is_char_boundary(i) { + return i; + } + } + s.len() +} + #[derive(Clone, Copy, Debug, Default)] pub(crate) struct Indent { block_indent: usize,