Skip to content

Commit

Permalink
ST1018: don't flag ZWJ that are part of emoji sequences
Browse files Browse the repository at this point in the history
Closes gh-1113
  • Loading branch information
dominikh committed Nov 6, 2021
1 parent 05b3355 commit d42e6ab
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
11 changes: 9 additions & 2 deletions stylecheck/lint.go
Original file line number Diff line number Diff line change
Expand Up @@ -690,14 +690,21 @@ func CheckInvisibleCharacters(pass *analysis.Pass) (interface{}, error) {
var invalids []invalid
hasFormat := false
hasControl := false
prev := rune(-1)
const zwj = '\u200d'
for off, r := range lit.Value {
if unicode.Is(unicode.Cf, r) {
invalids = append(invalids, invalid{r, off})
hasFormat = true
// Don't flag joined emojis. These are multiple emojis joined with ZWJ, which some platform render as single composite emojis.
// For the purpose of this check, we consider all symbols, including all symbol modifiers, emoji.
if r != zwj || (r == zwj && !unicode.Is(unicode.S, prev)) {
invalids = append(invalids, invalid{r, off})
hasFormat = true
}
} else if unicode.Is(unicode.Cc, r) && r != '\n' && r != '\t' && r != '\r' {
invalids = append(invalids, invalid{r, off})
hasControl = true
}
prev = r
}

switch len(invalids) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ var (
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ var (
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬\a" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)

-- delete control character U+0007 --
Expand All @@ -24,6 +26,8 @@ var (
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)

-- delete format character U+200B --
Expand All @@ -38,6 +42,8 @@ var (
est`
e = `ZeroWidth` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)

-- replace format character U+200B with '\u200b' --
Expand All @@ -52,6 +58,8 @@ var (
est`
e = `Zero\u200bWidth` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)

-- delete all control characters --
Expand All @@ -66,6 +74,8 @@ var (
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)

-- replace all control characters with escape sequences --
Expand All @@ -80,4 +90,39 @@ var (
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬​" // want `Unicode format and control characters`
)


-- delete all format and control characters --
// Package pkg ...
package pkg

var (
a = "" // want `Unicode control character U\+0007`
b = "" // want `Unicode control characters`
c = "Test test"
d = `T
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬" // want `Unicode format and control characters`
)

-- replace all format and control characters with escape sequences --
// Package pkg ...
package pkg

var (
a = "" // want `Unicode control character U\+0007`
b = "" // want `Unicode control characters`
c = "Test test"
d = `T
est`
e = `Zero​Width` // want `Unicode format character U\+200B`
f = "\u200b"
g = "👩🏽‍🔬" // want `Unicode control character U\+0007`
h = "👩🏽‍🔬\a\u200b" // want `Unicode format and control characters`
)

0 comments on commit d42e6ab

Please sign in to comment.