Skip to content

Commit

Permalink
Emit escape sequences for private-use characters in expanded mode (#1430
Browse files Browse the repository at this point in the history
)
  • Loading branch information
nex3 authored Aug 16, 2021
1 parent e0e132e commit 2a03907
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## 1.37.6

* In expanded mode, emit characters in Unicode private-use areas as escape
sequences rather than literal characters.

* Fix a bug where quotes would be omitted for an attribute selector whose value
was a single backslash.

Expand Down
29 changes: 28 additions & 1 deletion lib/src/util/character.dart
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,34 @@ bool isHex(int? character) {

/// Returns whether [character] is the beginning of a UTF-16 surrogate pair.
bool isHighSurrogate(int character) =>
character >= 0xD800 && character <= 0xDBFF;
// A character is a high surrogate exactly if it matches 0b110110XXXXXXXXXX.
// 0x36 == 0b110110.
character >> 10 == 0x36;

/// Returns whether [character] is a Unicode private-use code point in the Basic
/// Multilingual Plane.
///
/// See https://en.wikipedia.org/wiki/Private_Use_Areas for details.
bool isPrivateUseBMP(int character) =>
character >= 0xE000 && character <= 0xF8FF;

/// Returns whether [character] is the high surrogate for a code point in a
/// Unicode private-use supplementary plane.
///
/// See https://en.wikipedia.org/wiki/Private_Use_Areas for details.
bool isPrivateUseHighSurrogate(int character) =>
// Supplementary Private Use Area-A's and B's high surrogates range from
// 0xDB80 to 0xDBFF, which covers exactly the range 0b110110111XXXXXXX.
// 0b110110111 == 0x1B7.
character >> 7 == 0x1B7;

/// Combines a UTF-16 high and low surrogate pair into a single code unit.
///
/// See https://en.wikipedia.org/wiki/UTF-16 for details.
int combineSurrogates(int highSurrogate, int lowSurrogate) =>
// 0x3FF == 0b0000001111111111, which masks out the six bits that indicate
// high/low surrogates.
0x10000 + ((highSurrogate & 0x3FF) << 10) + (lowSurrogate & 0x3FF);

// Returns whether [character] can start a simple selector other than a type
// selector.
Expand Down
71 changes: 61 additions & 10 deletions lib/src/visitor/serialize.dart
Original file line number Diff line number Diff line change
Expand Up @@ -948,15 +948,7 @@ class _SerializeVisitor
case $gs:
case $rs:
case $us:
buffer.writeCharCode($backslash);
if (char > 0xF) buffer.writeCharCode(hexCharFor(char >> 4));
buffer.writeCharCode(hexCharFor(char & 0xF));
if (string.length == i + 1) break;

var next = string.codeUnitAt(i + 1);
if (isHex(next) || next == $space || next == $tab) {
buffer.writeCharCode($space);
}
_writeEscape(buffer, char, string, i);
break;

case $backslash:
Expand All @@ -965,6 +957,12 @@ class _SerializeVisitor
break;

default:
var newIndex = _tryPrivateUseCharacter(buffer, char, string, i);
if (newIndex != null) {
i = newIndex;
break;
}

buffer.writeCharCode(char);
break;
}
Expand Down Expand Up @@ -996,13 +994,66 @@ class _SerializeVisitor
break;

default:
_buffer.writeCharCode(char);
afterNewline = false;
var newIndex = _tryPrivateUseCharacter(_buffer, char, string, i);
if (newIndex != null) {
i = newIndex;
break;
}

_buffer.writeCharCode(char);
break;
}
}
}

/// If [codeUnit] is (the beginning of) a private-use character and Sass isn't
/// emitting compressed CSS, writes that character as an escape to [buffer].
///
/// The [string] is the string from which [codeUnit] was read, and [i] is the
/// index it was read from. If this successfully writes the character, returns
/// the index of the *last* code unit that was consumed for it. Otherwise,
/// returns `null`.
///
/// In expanded mode, we print all characters in Private Use Areas as escape
/// codes since there's no useful way to render them directly. These
/// characters are often used for glyph fonts, where it's useful for readers
/// to be able to distinguish between them in the rendered stylesheet.
int? _tryPrivateUseCharacter(
StringBuffer buffer, int codeUnit, String string, int i) {
if (_isCompressed) return null;

if (isPrivateUseBMP(codeUnit)) {
_writeEscape(buffer, codeUnit, string, i);
return i;
}

if (isPrivateUseHighSurrogate(codeUnit) && string.length > i + 1) {
_writeEscape(buffer,
combineSurrogates(codeUnit, string.codeUnitAt(i + 1)), string, i + 1);
return i + 1;
}

return null;
}

/// Writes [character] as a hexadecimal escape sequence to [buffer].
///
/// The [string] is the string from which the escape is being written, and [i]
/// is the index of the last code unit of [character] in that string. These
/// are used to write a trailing space after the escape if necessary to
/// disambiguate it from the next character.
void _writeEscape(StringBuffer buffer, int character, String string, int i) {
buffer.writeCharCode($backslash);
buffer.write(character.toRadixString(16));

if (string.length == i + 1) return;
var next = string.codeUnitAt(i + 1);
if (isHex(next) || next == $space || next == $tab) {
buffer.writeCharCode($space);
}
}

// ## Selectors

void visitAttributeSelector(AttributeSelector attribute) {
Expand Down
29 changes: 29 additions & 0 deletions test/compressed_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,35 @@ void main() {
expect(_compile("a {b: #cc3232}"), equals("a{b:#cc3232}"));
});
});

group("strings", () {
group("emits private-use area characters as literal characters", () {
var testCharacter = (int character) {
var escape = "\\${character.toRadixString(16)}";
test("$escape", () {
expect(
_compile("a {b: $escape}"),
equalsIgnoringWhitespace(
"a{b:${String.fromCharCode(character)}}"));
});
};

group("in the basic multilingual plane", () {
testCharacter(0xe000);
testCharacter(0xf000);
testCharacter(0xf8ff);
});

group("in the supplementary planes", () {
testCharacter(0xf0000);
testCharacter(0xfabcd);
testCharacter(0xffffd);
testCharacter(0x100000);
testCharacter(0x10abcd);
testCharacter(0x10fffd);
});
});
});
});

group("the top level", () {
Expand Down
46 changes: 46 additions & 0 deletions test/output_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,52 @@ import 'package:test/test.dart';
import 'package:sass/sass.dart';

void main() {
group("emits private-use area characters as escapes in expanded mode", () {
var testCharacter = (String escape) {
test("$escape", () {
expect(compileString("a {b: $escape}"),
equalsIgnoringWhitespace("a { b: $escape; }"));
});
};

group("in the basic multilingual plane", () {
testCharacter(r"\e000");
testCharacter(r"\f000");
testCharacter(r"\f8ff");
});

group("in the supplementary planes", () {
testCharacter(r"\f0000");
testCharacter(r"\fabcd");
testCharacter(r"\ffffd");
testCharacter(r"\100000");
testCharacter(r"\10abcd");
testCharacter(r"\10fffd");

// Although these aren't technically in private-use areaa, they're in
// private-use planes and they have no visual representation to we
// escape them as well.
group("that aren't technically in PUAs", () {
testCharacter(r"\ffffe");
testCharacter(r"\fffff");
testCharacter(r"\10fffe");
testCharacter(r"\10ffff");
});
});

group("adds a space", () {
test("if followed by a hex character", () {
expect(compileString(r"a {b: '\e000 a'}"),
equalsIgnoringWhitespace(r'a { b: "\e000 a"; }'));
});

test("if followed by a space", () {
expect(compileString(r"a {b: '\e000 '}"),
equalsIgnoringWhitespace(r'a { b: "\e000 "; }'));
});
});
});

// Regression test for sass/dart-sass#623. This needs to be tested here
// because sass-spec normalizes CR LF newlines.
group("normalizes newlines in a loud comment", () {
Expand Down

0 comments on commit 2a03907

Please sign in to comment.