Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from PaulSonOfLars:master #5

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion common.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func validStart(pos int, input []rune) bool {
}

func validEnd(pos int, input []rune) bool {
// First char is not a valid end char.
// First char is not a valid end char; we do NOT allow empty entities.
// If the end char has a space before it, its not valid either.
if pos == 0 || unicode.IsSpace(input[pos-1]) {
return false
Expand Down
6 changes: 3 additions & 3 deletions commonV2.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func isClosingTag(in []rune, pos int) bool {
return false
}

func getClosingTag(in []rune, tag string) (int, int) {
func getClosingTag(in []rune, openingTag string, closingTag string) (int, int) {
offset := 0
subtags := 0
for offset < len(in) {
Expand All @@ -164,9 +164,9 @@ func getClosingTag(in []rune, tag string) (int, int) {
}

closingTagIdx := openingTagIdx + 2 + c
if string(in[openingTagIdx+1:closingTagIdx]) == tag { // found a nested tag, this is annoying
if string(in[openingTagIdx+1:closingTagIdx]) == openingTag { // found a nested tag, this is annoying
subtags++
} else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == tag {
} else if isClosingTag(in, openingTagIdx) && string(in[openingTagIdx+2:closingTagIdx]) == closingTag {
if subtags == 0 {
return openingTagIdx, closingTagIdx
}
Expand Down
223 changes: 157 additions & 66 deletions md2htmlV2.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"html"
"sort"
"strings"
"unicode"
)

var defaultConverterV2 = ConverterV2{
Expand Down Expand Up @@ -42,20 +43,23 @@ func MD2HTMLButtonsV2(in string) (string, []ButtonV2) {
}

var chars = map[string]string{
"`": "code",
"```": "pre",
"_": "i",
"*": "b",
"~": "s",
"__": "u",
"|": "", // this is a placeholder for || to work
"||": "span class=\"tg-spoiler\"",
"!": "", // for emoji
"[": "", // for links
"]": "", // for links/emoji
"(": "", // for links/emoji
")": "", // for links/emoji
"\\": "", // for escapes
"`": "code",
"```": "pre",
"_": "i",
"*": "b",
"~": "s",
"__": "u",
"|": "", // this is a placeholder for || to work
"||": "span class=\"tg-spoiler\"",
"!": "", // for emoji
"![": "", // for emoji
"[": "", // for links
"]": "", // for links/emoji
"(": "", // for links/emoji
")": "", // for links/emoji
"\\": "", // for escapes
"&": "", // for blockquotes
"&gt;": "blockquote",
}

var AllMarkdownV2Chars = func() []rune {
Expand Down Expand Up @@ -86,6 +90,53 @@ var skipStarts = map[rune]bool{
'[': true, // links
}

func getItem(in []rune, i int) (string, int, bool) {
c := in[i]
if _, ok := chars[string(c)]; !ok {
return "", 0, false
}

if !validStart(i, in) && !skipStarts[c] {
if c == '\\' && i+1 < len(in) {
escaped := string(in[i+1])
if _, ok := chars[escaped]; ok {
return escaped, 1, false
}
}
return "", 0, false
}

item := string(c)
if c == '|' &&
i+1 < len(in) && in[i+1] == '|' {
return "||", 1, true

} else if c == '_' &&
i+1 < len(in) && in[i+1] == '_' { // support __
return "__", 1, true

} else if c == '`' &&
i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ```
return "```", 2, true

} else if c == '&' &&
i+3 < len(in) && in[i+1] == 'g' && in[i+2] == 't' && in[i+3] == ';' &&
validBlockQuoteStart(in, i) {
return "&gt;", 3, true

} else if c == '*' &&
i+5 < len(in) && in[i+1] == '*' && in[i+2] == '&' && in[i+3] == 'g' && in[i+4] == 't' && in[i+5] == ';' &&
// We force support for **> to allow for people to separate quotes/expandable quote blocks with **
validBlockQuoteStart(in, i) {
return "**&gt;", 5, true

} else if c == '!' && i+1 < len(in) && in[i+1] == '[' {
return "![", 1, true
}

return item, 0, true
}

// TODO: add support for a map-like check of which items cannot be included.
//
// Eg: `code` cannot be italic/bold/underline/strikethrough
Expand All @@ -96,49 +147,20 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2
out := strings.Builder{}

for i := 0; i < len(in); i++ {
c := in[i]
if _, ok := chars[string(c)]; !ok {
out.WriteRune(c)
continue
}

if !validStart(i, in) && !skipStarts[c] {
if c == '\\' && i+1 < len(in) {
if _, ok := chars[string(in[i+1])]; ok {
out.WriteRune(in[i+1])
i++
continue
}
item, offset, ok := getItem(in, i)
if !ok {
if item == "" {
item = string(in[i])
}
out.WriteRune(c)
out.WriteString(item)
i += offset
continue
}
i += offset

switch c {
case '`', '*', '~', '_', '|': // '||', '__', and '```' are included here too
item := string(c)
if c == '|' { // support ||
// if single |, ignore. We only care about double ||
if i+1 >= len(in) || in[i+1] != '|' {
out.WriteRune(c)
continue
}

item = "||"
i++
} else if c == '_' && i+1 < len(in) && in[i+1] == '_' { // support __
item = "__"
i++
} else if c == '`' && i+2 < len(in) && in[i+1] == '`' && in[i+2] == '`' { // support ```
item = "```"
i += 2
}

if i+1 >= len(in) {
out.WriteString(item)
continue
}

switch item {
// All cases where start and closing tags are the same.
case "`", "*", "~", "_", "```", "||", "__":
idx := getValidEnd(in[i+1:], item)
if idx < 0 {
// not found; write and move on.
Expand Down Expand Up @@ -174,29 +196,44 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2
nestedT, nestedB := cv.md2html(in[nStart:nEnd], enableButtons)
return out.String() + "<" + chars[item] + ">" + nestedT + "</" + closeSpans(chars[item]) + ">" + followT, append(nestedB, followB...)

case '!':
if len(in) <= i+1 || in[i+1] != '[' {
out.WriteRune(c)
case "&gt;", "**&gt;":
nStart := i + 1
for nStart < len(in) && unicode.IsSpace(in[nStart]) {
nStart++
}

if nStart >= len(in) {
out.WriteString(item)
continue
}

ok, text, content, newEnd := getLinkContents(in[i+1:], true)
nEnd, contents, expandable := getBlockQuoteEnd(in, nStart)
nestedT, nestedB := cv.md2html(contents, enableButtons)
followT, followB := cv.md2html(in[nEnd:], enableButtons)

if expandable {
return out.String() + "<blockquote expandable>" + strings.TrimSpace(nestedT) + "</blockquote>" + followT, append(nestedB, followB...)
}
return out.String() + "<blockquote>" + strings.TrimSpace(nestedT) + "</blockquote>" + followT, append(nestedB, followB...)

case "![":
ok, text, content, newEnd := getLinkContents(in[i:], true)
if !ok {
out.WriteRune(c)
out.WriteString(item)
continue
}
end := i + 1 + newEnd
end := i + newEnd

content = strings.TrimPrefix(content, "tg://emoji?id=")

nestedT, nestedB := cv.md2html(text, enableButtons)
followT, followB := cv.md2html(in[end:], enableButtons)
return out.String() + `<tg-emoji emoji-id="` + content + `">` + nestedT + "</tg-emoji>" + followT, append(nestedB, followB...)

case '[':
case "[":
ok, text, content, newEnd := getLinkContents(in[i:], false)
if !ok {
out.WriteRune(c)
out.WriteString(item)
continue
}
end := i + newEnd
Expand Down Expand Up @@ -227,24 +264,78 @@ func (cv ConverterV2) md2html(in []rune, enableButtons bool) (string, []ButtonV2
nestedT, nestedB := cv.md2html(text, enableButtons)
return out.String() + `<a href="` + content + `">` + nestedT + "</a>" + followT, append(nestedB, followB...)

case ']', '(', ')':
out.WriteRune(c)

case '\\':
case "\\":
if i+1 < len(in) {
if _, ok := chars[string(in[i+1])]; ok {
out.WriteRune(in[i+1])
i++
continue
}
}
out.WriteRune(c)
out.WriteString(item)

default:
out.WriteString(item)
}
}

return out.String(), nil
}

func getBlockQuoteEnd(in []rune, nStart int) (int, []rune, bool) {
var contents []rune // We store all the contents, minus the > characters, so we avoid double-html tags
lineStart := true
for j := nStart; j < len(in); j++ {
if lineStart && in[j] == ' ' {
// Skip space chars at start of lines
continue
}

lineStart = in[j] == '\n'
contents = append(contents, in[j])

// Keep skipping until we get a newline
if in[j] != '\n' {
continue
}

if isExpandableEnd(in, j) {
// Extra -1 to include newline
return j, contents[:len(contents)-3], true
}

if j+4 < len(in) && in[j+1] == '&' && in[j+2] == 'g' && in[j+3] == 't' && in[j+4] == ';' {
j = j + 4 // skip '>' symbol for the next blockquote start
continue
}
return j, contents, false
}

if isExpandableEnd(in, len(in)) {
return len(in), contents[:len(contents)-2], true
}

return len(in), contents, false
}

func isExpandableEnd(in []rune, j int) bool {
return j-2 >= 0 && in[j-1] == '|' && in[j-2] == '|'
}

func validBlockQuoteStart(in []rune, i int) bool {
for j := i - 1; j >= 0; j-- {
if !unicode.IsSpace(in[j]) {
return false
}
if in[j] == '\n' {
return true
}
}

// Start of message; must be valid.
return true
}

func EscapeMarkdownV2(r []rune) string {
out := strings.Builder{}
for i, x := range r {
Expand Down
Loading