Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Handle multi-byte utf8 characters in formatter #6118

Merged
merged 9 commits into from
Sep 24, 2024
6 changes: 6 additions & 0 deletions compiler/noirc_frontend/src/lexer/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub enum LexerErrorKind {
InvalidEscape { escaped: char, span: Span },
#[error("Invalid quote delimiter `{delimiter}`, valid delimiters are `{{`, `[`, and `(`")]
InvalidQuoteDelimiter { delimiter: SpannedToken },
#[error("Non-ASCII characters are invalid in comments")]
NonAsciiComment { span: Span },
#[error("Expected `{end_delim}` to close this {start_delim}")]
UnclosedQuote { start_delim: SpannedToken, end_delim: Token },
}
Expand Down Expand Up @@ -65,6 +67,7 @@ impl LexerErrorKind {
LexerErrorKind::UnterminatedStringLiteral { span } => *span,
LexerErrorKind::InvalidEscape { span, .. } => *span,
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(),
LexerErrorKind::NonAsciiComment { span, .. } => *span,
LexerErrorKind::UnclosedQuote { start_delim, .. } => start_delim.to_span(),
}
}
Expand Down Expand Up @@ -124,6 +127,9 @@ impl LexerErrorKind {
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => {
(format!("Invalid quote delimiter `{delimiter}`"), "Valid delimiters are `{`, `[`, and `(`".to_string(), delimiter.to_span())
},
LexerErrorKind::NonAsciiComment { span } => {
("Non-ASCII character in comment".to_string(), "Invalid comment character: only ASCII is currently supported.".to_string(), *span)
}
LexerErrorKind::UnclosedQuote { start_delim, end_delim } => {
("Unclosed `quote` expression".to_string(), format!("Expected a `{end_delim}` to close this `{start_delim}`"), start_delim.to_span())
}
Expand Down
24 changes: 24 additions & 0 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
position: Position,
done: bool,
skip_comments: bool,
skip_whitespaces: bool,

Check warning on line 21 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (whitespaces)
max_integer: BigInt,
}

Expand Down Expand Up @@ -46,8 +46,8 @@
position: 0,
done: false,
skip_comments: true,
skip_whitespaces: true,

Check warning on line 49 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (whitespaces)
max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus())

Check warning on line 50 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (biguint)
- BigInt::one(),
}
}
Expand All @@ -57,8 +57,8 @@
self
}

pub fn skip_whitespaces(mut self, flag: bool) -> Self {

Check warning on line 60 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (whitespaces)
self.skip_whitespaces = flag;

Check warning on line 61 in compiler/noirc_frontend/src/lexer/lexer.rs

View workflow job for this annotation

GitHub Actions / Code

Unknown word (whitespaces)
self
}

Expand Down Expand Up @@ -606,6 +606,11 @@
};
let comment = self.eat_while(None, |ch| ch != '\n');

if !comment.is_ascii() {
let span = Span::from(start..self.position);
return Err(LexerErrorKind::NonAsciiComment { span });
}

if doc_style.is_none() && self.skip_comments {
return self.next_token();
}
Expand Down Expand Up @@ -651,6 +656,11 @@
}

if depth == 0 {
if !content.is_ascii() {
let span = Span::from(start..self.position);
return Err(LexerErrorKind::NonAsciiComment { span });
}

if doc_style.is_none() && self.skip_comments {
return self.next_token();
}
Expand Down Expand Up @@ -1331,6 +1341,7 @@

Err(LexerErrorKind::InvalidIntegerLiteral { .. })
| Err(LexerErrorKind::UnexpectedCharacter { .. })
| Err(LexerErrorKind::NonAsciiComment { .. })
| Err(LexerErrorKind::UnterminatedBlockComment { .. }) => {
expected_token_found = true;
}
Expand Down Expand Up @@ -1389,4 +1400,17 @@
}
}
}

#[test]
fn test_non_ascii_comments() {
let cases = vec!["// 🙂", "// schön", "/* in the middle 🙂 of a comment */"];

for source in cases {
let mut lexer = Lexer::new(source);
assert!(
lexer.any(|token| matches!(token, Err(LexerErrorKind::NonAsciiComment { .. }))),
"Expected NonAsciiComment error"
);
}
}
}
4 changes: 2 additions & 2 deletions noir_stdlib/src/collections/map.nr
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::default::Default;
use crate::hash::{Hash, Hasher, BuildHasher};
use crate::collections::bounded_vec::BoundedVec;

// We use load factor α_max = 0.75.
// We use load factor alpha_max = 0.75.
// Upon exceeding it, assert will fail in order to inform the user
// about performance degradation, so that he can adjust the capacity.
global MAX_LOAD_FACTOR_NUMERATOR = 3;
Expand Down Expand Up @@ -624,7 +624,7 @@ impl<K, V, let N: u32, B> HashMap<K, V, N, B> {
(hash + (attempt + attempt * attempt) / 2) % N
}

// Amount of elements in the table in relation to available slots exceeds α_max.
// Amount of elements in the table in relation to available slots exceeds alpha_max.
// To avoid a comparatively more expensive division operation
// we conduct cross-multiplication instead.
// n / m >= MAX_LOAD_FACTOR_NUMERATOR / MAX_LOAD_FACTOR_DEN0MINATOR
Expand Down
14 changes: 7 additions & 7 deletions noir_stdlib/src/ec/mod.nr
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
// ========
// The following three elliptic curve representations are admissible:
mod tecurve; // Twisted Edwards curves
mod swcurve; // Elliptic curves in Short Weierstraß form
mod swcurve; // Elliptic curves in Short Weierstrass form
mod montcurve; // Montgomery curves
mod consts; // Commonly used curve presets
//
// Note that Twisted Edwards and Montgomery curves are (birationally) equivalent, so that
// they may be freely converted between one another, whereas Short Weierstraß curves are
// they may be freely converted between one another, whereas Short Weierstrass curves are
// more general. Diagramatically:
//
// tecurve == montcurve swcurve
// tecurve == montcurve `subset` swcurve
//
// Each module is further divided into two submodules, 'affine' and 'curvegroup', depending
// on the preferred coordinate representation. Affine coordinates are none other than the usual
Expand Down Expand Up @@ -47,7 +47,7 @@ mod consts; // Commonly used curve presets
// coordinates by calling the `into_group` (resp. `into_affine`) method on them. Finally,
// Points may be freely mapped between their respective Twisted Edwards and Montgomery
// representations by calling the `into_montcurve` or `into_tecurve` methods. For mappings
// between Twisted Edwards/Montgomery curves and Short Weierstraß curves, see the Curve section
// between Twisted Edwards/Montgomery curves and Short Weierstrass curves, see the Curve section
// below, as the underlying mappings are those of curves rather than ambient spaces.
// As a rule, Points in affine (or CurveGroup) coordinates are mapped to Points in affine
// (resp. CurveGroup) coordinates.
Expand Down Expand Up @@ -91,21 +91,21 @@ mod consts; // Commonly used curve presets
// Curve configurations may also be converted between different curve representations by calling the `into_swcurve`,
// `into_montcurve` and `into_tecurve` methods subject to the relation between the curve representations mentioned
// above. Note that it is possible to map Points from a Twisted Edwards/Montgomery curve to the corresponding
// Short Weierstraß representation and back, and the methods to do so are exposed as `map_into_swcurve` and
// Short Weierstrass representation and back, and the methods to do so are exposed as `map_into_swcurve` and
// `map_from_swcurve`, which each take one argument, the point to be mapped.
//
// Curve maps
// ==========
// There are a few different ways of mapping Field elements to elliptic curves. Here we provide the simplified
// Shallue-van de Woestijne-Ulas and Elligator 2 methods, the former being applicable to all curve types
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstraß curve satisfies
// provided above subject to the constraint that the coefficients of the corresponding Short Weierstrass curve satisfies
// a*b != 0 and the latter being applicable to Montgomery and Twisted Edwards curves subject to the constraint that
// the coefficients of the corresponding Montgomery curve satisfy j*k != 0 and (j^2 - 4)/k^2 is non-square.
//
// The simplified Shallue-van de Woestijne-Ulas method is exposed as the method `swu_map` on the Curve configuration and
// depends on two parameters, a Field element z != -1 for which g(x) - z is irreducible over Field and g(b/(z*a)) is
// square, where g(x) = x^3 + a*x + b is the right-hand side of the defining equation of the corresponding Short
// Weierstraß curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
// Weierstrass curve, and a Field element u to be mapped onto the curve. For example, in the case of bjj_affine above,
// it may be determined using the scripts provided at <https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve> that z = 5.
//
// The Elligator 2 method is exposed as the method `elligator2_map` on the Curve configurations of Montgomery and
Expand Down
12 changes: 6 additions & 6 deletions noir_stdlib/src/ec/montcurve.nr
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ mod affine {
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
pub fn into_swcurve(self) -> SWCurve {
let j = self.j;
let k = self.k;
Expand All @@ -155,7 +155,7 @@ mod affine {
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
}

// Point mapping into equivalent Short Weierstraß curve
// Point mapping into equivalent Short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
if p.is_zero() {
SWPoint::zero()
Expand All @@ -164,7 +164,7 @@ mod affine {
}
}

// Point mapping from equivalent Short Weierstraß curve
// Point mapping from equivalent Short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
let SWPoint {x, y, infty} = p;
let j = self.j;
Expand Down Expand Up @@ -347,7 +347,7 @@ mod curvegroup {
TECurve::new((j + 2) / k, (j - 2) / k, gen.into_tecurve())
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
fn into_swcurve(self) -> SWCurve {
let j = self.j;
let k = self.k;
Expand All @@ -357,12 +357,12 @@ mod curvegroup {
SWCurve::new(a0, b0, self.map_into_swcurve(self.gen))
}

// Point mapping into equivalent Short Weierstraß curve
// Point mapping into equivalent Short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
self.into_affine().map_into_swcurve(p.into_affine()).into_group()
}

// Point mapping from equivalent Short Weierstraß curve
// Point mapping from equivalent Short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
self.into_affine().map_from_swcurve(p.into_affine()).into_group()
}
Expand Down
8 changes: 4 additions & 4 deletions noir_stdlib/src/ec/swcurve.nr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mod affine {
// Affine representation of Short Weierstraß curves
// Affine representation of Short Weierstrass curves
// Points are represented by two-dimensional Cartesian coordinates.
// Group operations are implemented in terms of those in CurveGroup (in this case, extended Twisted Edwards) coordinates
// for reasons of efficiency, cf. <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates>.
Expand All @@ -10,7 +10,7 @@ mod affine {
use crate::cmp::Eq;

// Curve specification
pub struct Curve { // Short Weierstraß curve
pub struct Curve { // Short Weierstrass curve
// Coefficients in defining equation y^2 = x^3 + ax + b
a: Field,
b: Field,
Expand Down Expand Up @@ -187,14 +187,14 @@ mod affine {
}

mod curvegroup {
// CurveGroup representation of Weierstraß curves
// CurveGroup representation of Weierstrass curves
// Points are represented by three-dimensional Jacobian coordinates.
// See <https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates> for details.
use crate::ec::swcurve::affine;
use crate::cmp::Eq;

// Curve specification
pub struct Curve { // Short Weierstraß curve
pub struct Curve { // Short Weierstrass curve
// Coefficients in defining equation y^2 = x^3 + axz^4 + bz^6
a: Field,
b: Field,
Expand Down
16 changes: 8 additions & 8 deletions noir_stdlib/src/ec/tecurve.nr
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,17 @@ mod affine {
MCurve::new(j, k, gen_montcurve)
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
pub fn into_swcurve(self) -> SWCurve {
self.into_montcurve().into_swcurve()
}

// Point mapping into equivalent Short Weierstraß curve
// Point mapping into equivalent Short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
self.into_montcurve().map_into_swcurve(p.into_montcurve())
}

// Point mapping from equivalent Short Weierstraß curve
// Point mapping from equivalent Short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
self.into_montcurve().map_from_swcurve(p).into_tecurve()
}
Expand All @@ -195,7 +195,7 @@ mod affine {
mod curvegroup {
// CurveGroup coordinate representation of Twisted Edwards curves
// Points are represented by four-dimensional projective coordinates, viz. extended Twisted Edwards coordinates.
// See §3 of <https://eprint.iacr.org/2008/522.pdf> for details.
// See section 3 of <https://eprint.iacr.org/2008/522.pdf> for details.
use crate::ec::tecurve::affine;
use crate::ec::montcurve::curvegroup::Curve as MCurve;
use crate::ec::montcurve::curvegroup::Point as MPoint;
Expand Down Expand Up @@ -317,7 +317,7 @@ mod curvegroup {
Point::new(x, y, t, z)
}

// Point doubling, cf. §3.3
// Point doubling, cf. section 3.3
pub fn double(self, p: Point) -> Point {
let Point{x, y, t: _t, z} = p;

Expand Down Expand Up @@ -385,17 +385,17 @@ mod curvegroup {
self.into_affine().into_montcurve().into_group()
}

// Conversion to equivalent Short Weierstraß curve
// Conversion to equivalent Short Weierstrass curve
fn into_swcurve(self) -> SWCurve {
self.into_montcurve().into_swcurve()
}

// Point mapping into equivalent short Weierstraß curve
// Point mapping into equivalent short Weierstrass curve
pub fn map_into_swcurve(self, p: Point) -> SWPoint {
self.into_montcurve().map_into_swcurve(p.into_montcurve())
}

// Point mapping from equivalent short Weierstraß curve
// Point mapping from equivalent short Weierstrass curve
fn map_from_swcurve(self, p: SWPoint) -> Point {
self.into_montcurve().map_from_swcurve(p).into_tecurve()
}
Expand Down
2 changes: 1 addition & 1 deletion noir_stdlib/src/field/mod.nr
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ impl Field {
r
}

// Parity of (prime) Field element, i.e. sgn0(x mod p) = 0 if x {0, ..., p-1} is even, otherwise sgn0(x mod p) = 1.
// Parity of (prime) Field element, i.e. sgn0(x mod p) = 0 if x `elem` {0, ..., p-1} is even, otherwise sgn0(x mod p) = 1.
pub fn sgn0(self) -> u1 {
self as u1
}
Expand Down
2 changes: 1 addition & 1 deletion noir_stdlib/src/hash/poseidon/bn254.nr
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod consts;

use crate::hash::poseidon::absorb;

// Variable-length Poseidon-128 sponge as suggested in second bullet point of §3 of https://eprint.iacr.org/2019/458.pdf
// Variable-length Poseidon-128 sponge as suggested in second bullet point of section 3 of https://eprint.iacr.org/2019/458.pdf
#[field(bn254)]
pub fn sponge<let N: u32>(msg: [Field; N]) -> Field {
absorb(consts::x5_5_config(), [0; 5], 4, 1, msg)[1]
Expand Down
17 changes: 15 additions & 2 deletions tooling/nargo_fmt/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl<'me> FmtVisitor<'me> {

pub(crate) fn slice(&self, span: impl Into<Span>) -> &'me str {
let span = span.into();
&self.source[span.start() as usize..span.end() as usize]
str_slice(self.source, span.start() as usize, span.end() as usize)
}

pub(crate) fn span_after(&self, span: impl Into<Span>, token: Token) -> Span {
Expand Down Expand Up @@ -188,7 +188,7 @@ impl<'me> FmtVisitor<'me> {

match comment.token() {
Token::LineComment(_, _) | Token::BlockComment(_, _) => {
let comment = &slice[span.start() as usize..span.end() as usize];
let comment = str_slice(slice, span.start() as usize, span.end() as usize);
if result.ends_with('\n') {
result.push_str(&indent);
} else if !self.at_start() {
Expand Down Expand Up @@ -247,6 +247,19 @@ impl<'me> FmtVisitor<'me> {
}
}

pub(crate) fn str_slice(s: &str, start: usize, end: usize) -> &str {
&s[start..ceil_char_boundary(s, end)]
}

pub(crate) fn ceil_char_boundary(s: &str, byte_index: usize) -> usize {
for i in byte_index..s.len() {
if s.is_char_boundary(i) {
return i;
}
}
s.len()
}

#[derive(Clone, Copy, Debug, Default)]
pub(crate) struct Indent {
block_indent: usize,
Expand Down
Loading