Skip to content

Commit

Permalink
format/mdext: maybe improve math parser
Browse files Browse the repository at this point in the history
  • Loading branch information
tulir committed Nov 2, 2024
1 parent f0c46cf commit 0f73c83
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 51 deletions.
26 changes: 16 additions & 10 deletions format/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,27 @@ func TestRenderMarkdown_DiscordUnderline(t *testing.T) {
}

var mathTests = map[string]string{
"$foo$": `<span data-mx-maths="foo"><code>foo</code></span>`,
"$$foo$$": `<div data-mx-maths="foo"><code>foo</code></div>`,
"$$\nfoo\nbar\n$$": `<div data-mx-maths="foo\nbar\n"><code>foo<br>bar<br></code></div>`,
"`$foo$`": `<code>$foo$</code>`,
"```\n$foo$\n```": `<pre><code>$foo$\n</code></pre>`,
"~~$foo$~~": `<del><span data-mx-maths="foo"><code>foo</code></span></del>`,
"$5 or $10": `$5 or $10`,
"5$ or 10$": `5$ or 10$`,
"$5 or 10$": `<span data-mx-maths="5 or 10"><code>5 or 10</code></span>`,
"$foo$": `<span data-mx-maths="foo"><code>foo</code></span>`,
"hello $foo$ world": `hello <span data-mx-maths="foo"><code>foo</code></span> world`,
"$$\nfoo\nbar\n$$": `<div data-mx-maths="foo\nbar"><code>foo<br>bar</code></div>`,
"`$foo$`": `<code>$foo$</code>`,
"```\n$foo$\n```": `<pre><code>$foo$\n</code></pre>`,
"~~meow $foo$ asd~~": `<del>meow <span data-mx-maths="foo"><code>foo</code></span> asd</del>`,
"$5 or $10": `$5 or $10`,
"5$ or 10$": `5$ or 10$`,
"$5 or 10$": `<span data-mx-maths="5 or 10"><code>5 or 10</code></span>`,
"$*500*$": `<span data-mx-maths="*500*"><code>*500*</code></span>`,
"$$\n*500*\n$$": `<div data-mx-maths="*500*"><code>*500*</code></div>`,

// TODO: This doesn't work :(
// Maybe same reason as the spoiler wrapping not working?
//"~~$foo$~~": `<del><span data-mx-maths="foo"><code>foo</code></span></del>`,
}

func TestRenderMarkdown_Math(t *testing.T) {
renderer := goldmark.New(goldmark.WithExtensions(extension.Strikethrough, mdext.Math, mdext.EscapeHTML), format.HTMLOptions)
for markdown, html := range mathTests {
rendered := format.UnwrapSingleParagraph(render(renderer, markdown))
assert.Equal(t, html, strings.ReplaceAll(rendered, "\n", "\\n"))
assert.Equal(t, html, strings.ReplaceAll(rendered, "\n", "\\n"), "with input %q", markdown)
}
}
70 changes: 29 additions & 41 deletions format/mdext/math.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import (
"bytes"
"fmt"
stdhtml "html"
"regexp"
"strings"
"unicode"

"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
Expand All @@ -25,7 +27,7 @@ var astKindMath = ast.NewNodeKind("Math")

type astMath struct {
ast.BaseInline
block bool
value []byte
}

func (n *astMath) Dump(source []byte, level int) {
Expand All @@ -38,7 +40,6 @@ func (n *astMath) Kind() ast.NodeKind {

type astMathBlock struct {
ast.BaseBlock
info *ast.Text
}

func (n *astMathBlock) Dump(source []byte, level int) {
Expand All @@ -49,22 +50,6 @@ func (n *astMathBlock) Kind() ast.NodeKind {
return astKindMath
}

type mathDelimiterProcessor struct{}

var defaultMathDelimiterProcessor = &mathDelimiterProcessor{}

func (p *mathDelimiterProcessor) IsDelimiter(b byte) bool {
return b == '$'
}

func (p *mathDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
return opener.Char == closer.Char
}

func (p *mathDelimiterProcessor) OnMatch(consumes int) ast.Node {
return &astMath{block: consumes > 1}
}

type inlineMathParser struct{}

var defaultInlineMathParser = &inlineMathParser{}
Expand All @@ -73,21 +58,30 @@ func NewInlineMathParser() parser.InlineParser {
return defaultInlineMathParser
}

const mathDelimiter = '$'

func (s *inlineMathParser) Trigger() []byte {
return []byte{'$'}
return []byte{mathDelimiter}
}

// This ignores lines where there's no space after the closing $ to avoid false positives
var latexInlineRegexp = regexp.MustCompile(`^(\$[^$]*\$)(?:$|\s)`)

func (s *inlineMathParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
// Ignore lines where the opening $ comes after a letter or number to avoid false positives
if unicode.IsLetter(before) || unicode.IsNumber(before) {
return nil
}
line, segment := block.PeekLine()
node := parser.ScanDelimiter(line, before, 1, defaultMathDelimiterProcessor)
if node == nil {
idx := latexInlineRegexp.FindSubmatchIndex(line)
if idx == nil {
return nil
}
node.Segment = segment.WithStop(segment.Start + node.OriginalLength)
block.Advance(node.OriginalLength)
pc.PushDelimiter(node)
return node
block.Advance(idx[3])
return &astMath{
value: block.Value(text.NewSegment(segment.Start+1, segment.Start+idx[3]-1)),
}
}

func (s *inlineMathParser) CloseBlock(parent ast.Node, pc parser.Context) {
Expand Down Expand Up @@ -115,50 +109,44 @@ func (b *blockMathParser) Trigger() []byte {
}

func (b *blockMathParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) {
const fenceChar = '$'
line, segment := reader.PeekLine()
line, _ := reader.PeekLine()
pos := pc.BlockOffset()
if pos < 0 || (line[pos] != fenceChar) {
if pos < 0 || (line[pos] != mathDelimiter) {
return nil, parser.NoChildren
}
findent := pos
i := pos
for ; i < len(line) && line[i] == fenceChar; i++ {
for ; i < len(line) && line[i] == mathDelimiter; i++ {
}
oFenceLength := i - pos
if oFenceLength < 2 {
return nil, parser.NoChildren
}
var info *ast.Text
if i < len(line)-1 {
rest := line[i:]
left := util.TrimLeftSpaceLength(rest)
right := util.TrimRightSpaceLength(rest)
if left < len(rest)-right {
infoStart, infoStop := segment.Start-segment.Padding+i+left, segment.Stop-right
value := rest[left : len(rest)-right]
if bytes.IndexByte(value, fenceChar) > -1 {
if bytes.IndexByte(value, mathDelimiter) > -1 {
return nil, parser.NoChildren
} else if infoStart != infoStop {
info = ast.NewTextSegment(text.NewSegment(infoStart, infoStop))
}
}
}
node := &astMathBlock{info: info}
node := &astMathBlock{}
pc.Set(mathBlockInfoKey, &mathBlockData{findent, oFenceLength, node})
return node, parser.NoChildren

}

func (b *blockMathParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State {
const fenceChar = '$'
line, segment := reader.PeekLine()
fdata := pc.Get(mathBlockInfoKey).(*mathBlockData)

w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 {
i := pos
for ; i < len(line) && line[i] == fenceChar; i++ {
for ; i < len(line) && line[i] == mathDelimiter; i++ {
}
length := i - pos
if length >= fdata.length && util.IsBlank(line[i:]) {
Expand Down Expand Up @@ -221,15 +209,15 @@ func (r *mathHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer
func (r *mathHTMLRenderer) renderMath(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
if entering {
tag := "span"
var tex string
switch typed := n.(type) {
case *astMathBlock:
tag = "div"
tex = string(n.Lines().Value(source))
case *astMath:
if typed.block {
tag = "div"
}
tex = string(typed.value)
}
tex := stdhtml.EscapeString(string(n.Text(source)))
tex = stdhtml.EscapeString(strings.TrimSpace(tex))
_, _ = fmt.Fprintf(w, `<%s data-mx-maths="%s"><code>%s</code></%s>`, tag, tex, strings.ReplaceAll(tex, "\n", "<br>"), tag)
}
return ast.WalkSkipChildren, nil
Expand Down

0 comments on commit 0f73c83

Please sign in to comment.