Skip to content

Commit

Permalink
refactor(parser): add Lexer::consume_2_chars
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Jul 31, 2024
1 parent ddd54af commit 8db8095
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 6 deletions.
94 changes: 94 additions & 0 deletions crates/oxc_codegen/tests/integration/snapshots/ts.snap.new
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
---
source: crates/oxc_codegen/tests/integration/ts.rs
assertion_line: 47
---
let x: string = `\x01`;
let x: string = `\x01`;

function foo<T extends string>(x: T, y: string, ...restOfParams: Omit<T, 'x'>): T {
return x;
}

let x: string[] = ['abc', 'def', 'ghi'];
let x: string[] = ['abc', 'def', 'ghi'];

let x: Array<string> = ['abc', 'def', 'ghi',];
let x: Array<string> = ['abc', 'def', 'ghi',];

let x: [string, number] = ['abc', 123];
let x: [string, number] = ['abc', 123];

let x: string | number = 'abc';
let x: string | number = 'abc';

let x: string & number = 'abc';
let x: string & number = 'abc';

let x: typeof String = 'string';
let x: typeof String = 'string';

let x: keyof string = 'length';
let x: keyof string = 'length';

let x: keyof typeof String = 'length';
let x: keyof typeof String = 'length';

let x: string['length'] = 123;
let x: string['length'] = 123;

function isString(value: unknown): asserts value is string {
if (typeof value !== 'string') {
throw new Error('Not a string');
}
}
function isString(value: unknown): asserts value is string {
if (typeof value !== 'string') {
throw new Error('Not a string');
}
}

import type { Foo } from 'foo';
import type { Foo } from 'foo';

import { Foo, type Bar } from 'foo';
import { Foo, type Bar } from 'foo';

export { Foo, type Bar } from 'foo';
export { Foo, type Bar } from 'foo';

type A<T> = { [K in keyof T as K extends string ? B<K> : K ]: T[K] }
type A<T> = { [K in keyof T as K extends string ? B<K> : K] : T[K]};

class A {readonly type = 'frame'}
class A {
readonly type = 'frame';
}

let foo: { <T>(t: T): void }
let foo: {<T>(t: T): void};

let foo: { new <T>(t: T): void }
let foo: {new <T>(t: T): void};

function <const T>(){}
function<const T>() {}

class A {m?(): void}
class A {
m?(): void;
}

class A {constructor(public readonly a: number) {}}
class A {
constructor(public readonly a: number) {}
}

abstract class A {private abstract static m() {}}
abstract class A {
private abstract static m() {}
}

abstract class A {private abstract static readonly prop: string}
abstract class A {
private abstract static readonly prop: string;
}
3 changes: 1 addition & 2 deletions crates/oxc_parser/src/lexer/byte_handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,7 @@ ascii_byte_handler!(QST(lexer) {
match next_2_bytes[0] {
b'?' => {
if next_2_bytes[1] == b'=' {
lexer.consume_char();
lexer.consume_char();
lexer.consume_2_chars();
Kind::Question2Eq
} else {
lexer.consume_char();
Expand Down
12 changes: 12 additions & 0 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,18 @@ impl<'a> Lexer<'a> {
self.source.next_char().unwrap()
}

/// Consume the current char and the next if not at EOF
#[inline]
fn next_2_chars(&mut self) -> Option<[char; 2]> {
self.source.next_2_chars()
}

/// Consume the current char and the next
#[inline]
fn consume_2_chars(&mut self) -> [char; 2] {
self.next_2_chars().unwrap()
}

/// Peek the next byte without advancing the position
#[inline]
fn peek_byte(&self) -> Option<u8> {
Expand Down
3 changes: 1 addition & 2 deletions crates/oxc_parser/src/lexer/punctuation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ impl<'a> Lexer<'a> {
/// Section 12.8 Punctuators
pub(super) fn read_dot(&mut self) -> Kind {
if self.peek_2_bytes() == Some([b'.', b'.']) {
self.consume_char();
self.consume_char();
self.consume_2_chars();
return Kind::Dot3;
}
if self.peek_byte().is_some_and(|b| b.is_ascii_digit()) {
Expand Down
30 changes: 30 additions & 0 deletions crates/oxc_parser/src/lexer/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,36 @@ impl<'a> Source<'a> {
Some(c)
}

/// Get next 2 chars of source, and advance position to after them.
#[inline]
pub(super) fn next_2_chars(&mut self) -> Option<[char; 2]> {
// Check not at EOF and handle if 2 x ASCII bytes
let [byte1, byte2] = self.peek_2_bytes()?;
if byte1.is_ascii() && byte2.is_ascii() {
// SAFETY: We just checked that there are at least 2 bytes remaining,
// and next 2 bytes are ASCII, so advancing by 2 bytes must put `ptr`
// in bounds and on a UTF-8 character boundary
unsafe { self.ptr = self.ptr.add(2) };
return Some([byte1 as char, byte2 as char]);
}

// Multi-byte Unicode character.
// Check invariant that `ptr` is on a UTF-8 character boundary.
debug_assert!(!is_utf8_cont_byte(byte1));

// Create a `Chars` iterator, get next 2 chars from it, and then update `self.ptr`
// to match `Chars` iterator's updated pointer afterwards.
// `Chars` iterator upholds same invariants as `Source`, so its pointer is guaranteed
// to be valid as `self.ptr`.
let mut chars = self.remaining().chars();
// SAFETY: We know that there's 2 bytes to be consumed, so first call to
// `chars.next()` must return `Some(_)`
let c1 = unsafe { chars.next().unwrap_unchecked() };
let c2 = chars.next()?;
self.ptr = chars.as_str().as_ptr();
Some([c1, c2])
}

/// Get next byte of source, and advance position to after it.
///
/// # SAFETY
Expand Down
3 changes: 1 addition & 2 deletions crates/oxc_parser/src/lexer/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,7 @@ impl<'a> Lexer<'a> {
return Some(SurrogatePair::CodePoint(high));
}

self.consume_char();
self.consume_char();
self.consume_2_chars();

let low = self.hex_4_digits()?;

Expand Down

0 comments on commit 8db8095

Please sign in to comment.