Skip to content

Commit

Permalink
Performance optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
yuin committed May 6, 2019
1 parent d9164d2 commit ad605c0
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 21 deletions.
28 changes: 21 additions & 7 deletions extension/linkify.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]

var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=\(\);]*)`)

var emailRegexp = regexp.MustCompile(`^[a-zA-Z0-9\.\-_\+]+@([a-zA-Z0-9\.\-_]+)`)

type linkifyParser struct {
}

Expand All @@ -32,6 +30,11 @@ func (s *linkifyParser) Trigger() []byte {
return []byte{' ', '*', '_', '~', '('}
}

var protoHTTP = []byte("http:")
var protoHTTPS = []byte("https:")
var protoFTP = []byte("ftp:")
var domainWWW = []byte("www.")

func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, segment := block.PeekLine()
consumes := 0
Expand All @@ -47,8 +50,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
var m []int
typ := ast.AutoLinkType(ast.AutoLinkEmail)
typ = ast.AutoLinkURL
m = urlRegexp.FindSubmatchIndex(line)
if m == nil {
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
m = urlRegexp.FindSubmatchIndex(line)
}
if m == nil && bytes.HasPrefix(line, domainWWW) {
m = wwwURLRegxp.FindSubmatchIndex(line)
}
if m != nil {
Expand Down Expand Up @@ -84,15 +89,24 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
}
if m == nil {
typ = ast.AutoLinkEmail
m = emailRegexp.FindSubmatchIndex(line)
stop := util.FindEmailIndex(line)
if stop < 0 {
return nil
}
at := bytes.IndexByte(line, '@')
m = []int{0, stop, at, stop - 1}
if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
return nil
}
lastChar := line[m[1]-1]
if lastChar == '.' {
m[1]--
} else if lastChar == '-' || lastChar == '_' {
return nil
}
if m[1] < len(line) {
nextChar := line[m[1]]
if nextChar == '-' || nextChar == '_' {
return nil
}
}
}
if m == nil {
Expand Down
22 changes: 11 additions & 11 deletions parser/auto_link.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package parser
import (
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"regexp"
"github.com/yuin/goldmark/util"
)

type autoLinkParser struct {
Expand All @@ -21,22 +21,22 @@ func (s *autoLinkParser) Trigger() []byte {
return []byte{'<'}
}

var emailAutoLinkRegexp = regexp.MustCompile(`^<([a-zA-Z0-9.!#$%&'*+\/=?^_` + "`" + `{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>`)

var autoLinkRegexp = regexp.MustCompile(`(?i)^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>`)

func (s *autoLinkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
line, segment := block.PeekLine()
match := emailAutoLinkRegexp.FindSubmatchIndex(line)
stop := util.FindEmailIndex(line[1:])
typ := ast.AutoLinkType(ast.AutoLinkEmail)
if match == nil {
match = autoLinkRegexp.FindSubmatchIndex(line)
if stop < 0 {
stop = util.FindURLIndex(line[1:])
typ = ast.AutoLinkURL
}
if match == nil {
if stop < 0 {
return nil
}
stop++
if stop >= len(line) || line[stop] != '>' {
return nil
}
value := ast.NewTextSegment(text.NewSegment(segment.Start+1, segment.Start+match[1]-1))
block.Advance(match[1])
value := ast.NewTextSegment(text.NewSegment(segment.Start+1, segment.Start+stop))
block.Advance(stop + 1)
return ast.NewAutoLink(typ, value)
}
4 changes: 3 additions & 1 deletion parser/setext_headings.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ func matchesSetextHeadingBar(line []byte) (byte, bool) {
level2 = util.TrimLeftLength(line[start:end], []byte{'-'})
c = '-'
}
end -= util.TrimRightSpaceLength(line[start:end])
if util.IsSpace(line[end-1]) {
end -= util.TrimRightSpaceLength(line[start:end])
}
if !((level1 > 0 && start+level1 == end) || (level2 > 0 && start+level2 == end)) {
return 0, false
}
Expand Down
84 changes: 82 additions & 2 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"io"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -291,12 +292,28 @@ func TrimRightLength(source, s []byte) int {

// TrimLeftSpaceLength returns a length of leading space characters.
func TrimLeftSpaceLength(source []byte) int {
return TrimLeftLength(source, spaces)
i := 0
for ; i < len(source); i++ {
if !IsSpace(source[i]) {
break
}
}
return i
}

// TrimRightSpaceLength returns a length of trailing space characters.
func TrimRightSpaceLength(source []byte) int {
return TrimRightLength(source, spaces)
l := len(source)
i := l - 1
for ; i >= 0; i-- {
if !IsSpace(source[i]) {
break
}
}
if i < 0 {
return l
}
return l - 1 - i
}

// TrimLeftSpace returns a subslice of the given string by slicing off all leading
Expand Down Expand Up @@ -722,6 +739,65 @@ func FindHTMLAttributeIndex(b []byte, canEscapeQuotes bool) [4]int {
return result
}

// FindURLIndex returns a stop index value if the given bytes seem an URL.
// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
func FindURLIndex(b []byte) int {
i := 0
if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
return -1
}
i++
for ; i < len(b); i++ {
c := b[i]
if urlTable[c]&4 != 4 {
break
}
}
if i == 1 || i > 33 || i >= len(b) {
return -1
}
if b[i] != ':' {
return -1
}
i++
for ; i < len(b); i++ {
c := b[i]
if urlTable[c]&1 != 1 {
break
}
}
return i
}

var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)

// FindEmailIndex returns a stop index value if the given bytes seem an email address.
func FindEmailIndex(b []byte) int {
// TODO: eliminate regexps
i := 0
for ; i < len(b); i++ {
c := b[i]
if emailTable[c]&1 != 1 {
break
}
}
if i == 0 {
return -1
}
if i >= len(b) || b[i] != '@' {
return -1
}
i++
if i >= len(b) {
return -1
}
match := emailDomainRegexp.FindSubmatchIndex(b[i:])
if match == nil {
return -1
}
return i + match[1]
}

var spaces = []byte(" \t\n\x0b\x0c\x0d")

var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
Expand All @@ -733,6 +809,10 @@ var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}

var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}

var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}

// UTF8Len returns a byte length of the utf-8 character.
func UTF8Len(b byte) int8 {
return utf8lenTable[b]
Expand Down

0 comments on commit ad605c0

Please sign in to comment.