Refactor markdown math render (#32831)

Add more tests
This commit is contained in:
wxiaoguang 2024-12-14 13:43:05 +08:00 committed by GitHub
parent 82c59d52ea
commit cc5ff98e0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 137 additions and 151 deletions

View File

@ -78,26 +78,23 @@ func (r *GlodmarkRender) Renderer() renderer.Renderer {
func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering { if entering {
language, _ := c.Language() languageBytes, _ := c.Language()
if language == nil { languageStr := giteautil.IfZero(string(languageBytes), "text")
language = []byte("text")
}
languageStr := string(language) preClasses := "code-block"
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" { if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading") preClasses += " is-loading"
} }
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " ")) err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, preClasses)
if err != nil { if err != nil {
return return
} }
// include language-x class as part of commonmark spec // include language-x class as part of commonmark spec, "chroma" class is used to highlight the code
// the "display" class is used by "js/markup/math.js" to render the code element as a block // the "display" class is used by "js/markup/math.ts" to render the code element as a block
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language)) // the "math.ts" strictly depends on the structure: <pre class="code-block is-loading"><code class="language-math display">...</code></pre>
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, languageStr)
if err != nil { if err != nil {
return return
} }
@ -128,7 +125,12 @@ func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
), ),
highlighting.WithWrapperRenderer(r.highlightingRenderer), highlighting.WithWrapperRenderer(r.highlightingRenderer),
), ),
math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)), math.NewExtension(&ctx.RenderInternal, math.Options{
Enabled: setting.Markdown.EnableMath,
ParseDollarInline: true,
ParseDollarBlock: true,
ParseSquareBlock: true, // TODO: this is a bad syntax, it should be deprecated in the future (by some config options)
}),
meta.Meta, meta.Meta,
), ),
goldmark.WithParserOptions( goldmark.WithParserOptions(

View File

@ -12,8 +12,9 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestMathRender(t *testing.T) {
const nl = "\n" const nl = "\n"
func TestMathRender(t *testing.T) {
testcases := []struct { testcases := []struct {
testcase string testcase string
expected string expected string
@ -86,6 +87,18 @@ func TestMathRender(t *testing.T) {
`$\text{$b$}$`, `$\text{$b$}$`,
`<p><code class="language-math">\text{$b$}</code></p>` + nl, `<p><code class="language-math">\text{$b$}</code></p>` + nl,
}, },
{
"a$`b`$c",
`<p>a<code class="language-math">b</code>c</p>` + nl,
},
{
"a $`b`$ c",
`<p>a <code class="language-math">b</code> c</p>` + nl,
},
{
"a$``b``$c x$```y```$z",
`<p>a<code class="language-math">b</code>c x<code class="language-math">y</code>z</p>` + nl,
},
} }
for _, test := range testcases { for _, test := range testcases {
@ -215,6 +228,11 @@ x
</ol> </ol>
`, `,
}, },
{
"inline-non-math",
`\[x]`,
`<p>[x]</p>` + nl,
},
} }
for _, test := range testcases { for _, test := range testcases {

View File

@ -16,16 +16,18 @@ import (
type blockParser struct { type blockParser struct {
parseDollars bool parseDollars bool
parseSquare bool
endBytesDollars []byte endBytesDollars []byte
endBytesBracket []byte endBytesSquare []byte
} }
// NewBlockParser creates a new math BlockParser // NewBlockParser creates a new math BlockParser
func NewBlockParser(parseDollarBlocks bool) parser.BlockParser { func NewBlockParser(parseDollars, parseSquare bool) parser.BlockParser {
return &blockParser{ return &blockParser{
parseDollars: parseDollarBlocks, parseDollars: parseDollars,
parseSquare: parseSquare,
endBytesDollars: []byte{'$', '$'}, endBytesDollars: []byte{'$', '$'},
endBytesBracket: []byte{'\\', ']'}, endBytesSquare: []byte{'\\', ']'},
} }
} }
@ -40,7 +42,7 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
var dollars bool var dollars bool
if b.parseDollars && line[pos] == '$' && line[pos+1] == '$' { if b.parseDollars && line[pos] == '$' && line[pos+1] == '$' {
dollars = true dollars = true
} else if line[pos] == '\\' && line[pos+1] == '[' { } else if b.parseSquare && line[pos] == '\\' && line[pos+1] == '[' {
if len(line[pos:]) >= 3 && line[pos+2] == '!' && bytes.Contains(line[pos:], []byte(`\]`)) { if len(line[pos:]) >= 3 && line[pos+2] == '!' && bytes.Contains(line[pos:], []byte(`\]`)) {
// do not process escaped attention block: "> \[!NOTE\]" // do not process escaped attention block: "> \[!NOTE\]"
return nil, parser.NoChildren return nil, parser.NoChildren
@ -53,10 +55,10 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
node := NewBlock(dollars, pos) node := NewBlock(dollars, pos)
// Now we need to check if the ending block is on the segment... // Now we need to check if the ending block is on the segment...
endBytes := giteaUtil.Iif(dollars, b.endBytesDollars, b.endBytesBracket) endBytes := giteaUtil.Iif(dollars, b.endBytesDollars, b.endBytesSquare)
idx := bytes.Index(line[pos+2:], endBytes) idx := bytes.Index(line[pos+2:], endBytes)
if idx >= 0 { if idx >= 0 {
// for case $$ ... $$ any other text // for case: "$$ ... $$ any other text" (this case will be handled by the inline parser)
for i := pos + 2 + idx + 2; i < len(line); i++ { for i := pos + 2 + idx + 2; i < len(line); i++ {
if line[i] != ' ' && line[i] != '\n' { if line[i] != ' ' && line[i] != '\n' {
return nil, parser.NoChildren return nil, parser.NoChildren
@ -70,6 +72,13 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
return node, parser.Close | parser.NoChildren return node, parser.Close | parser.NoChildren
} }
// for case "\[ ... ]" (no close marker on the same line)
for i := pos + 2 + idx + 2; i < len(line); i++ {
if line[i] != ' ' && line[i] != '\n' {
return nil, parser.NoChildren
}
}
segment.Start += pos + 2 segment.Start += pos + 2
node.Lines().Append(segment) node.Lines().Append(segment)
return node, parser.NoChildren return node, parser.NoChildren
@ -85,7 +94,7 @@ func (b *blockParser) Continue(node ast.Node, reader text.Reader, pc parser.Cont
line, segment := reader.PeekLine() line, segment := reader.PeekLine()
w, pos := util.IndentWidth(line, reader.LineOffset()) w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 { if w < 4 {
endBytes := giteaUtil.Iif(block.Dollars, b.endBytesDollars, b.endBytesBracket) endBytes := giteaUtil.Iif(block.Dollars, b.endBytesDollars, b.endBytesSquare)
if bytes.HasPrefix(line[pos:], endBytes) && util.IsBlank(line[pos+len(endBytes):]) { if bytes.HasPrefix(line[pos:], endBytes) && util.IsBlank(line[pos+len(endBytes):]) {
if util.IsBlank(line[pos+len(endBytes):]) { if util.IsBlank(line[pos+len(endBytes):]) {
newline := giteaUtil.Iif(line[len(line)-1] != '\n', 0, 1) newline := giteaUtil.Iif(line[len(line)-1] != '\n', 0, 1)

View File

@ -1,31 +0,0 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package math
import (
"github.com/yuin/goldmark/ast"
)
// InlineBlock represents inline math e.g. $$...$$
type InlineBlock struct {
Inline
}
// InlineBlock implements InlineBlock.
func (n *InlineBlock) InlineBlock() {}
// KindInlineBlock is the kind for math inline block
var KindInlineBlock = ast.NewNodeKind("MathInlineBlock")
// Kind returns KindInlineBlock
func (n *InlineBlock) Kind() ast.NodeKind {
return KindInlineBlock
}
// NewInlineBlock creates a new ast math inline block node
func NewInlineBlock() *InlineBlock {
return &InlineBlock{
Inline{},
}
}

View File

@ -8,7 +8,7 @@ import (
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
// Inline represents inline math e.g. $...$ or \(...\) // Inline struct represents inline math e.g. $...$ or \(...\)
type Inline struct { type Inline struct {
ast.BaseInline ast.BaseInline
} }

View File

@ -12,31 +12,25 @@ import (
) )
type inlineParser struct { type inlineParser struct {
start []byte trigger []byte
end []byte endBytesSingleDollar []byte
endBytesDoubleDollar []byte
endBytesBracket []byte
} }
var defaultInlineDollarParser = &inlineParser{ var defaultInlineDollarParser = &inlineParser{
start: []byte{'$'}, trigger: []byte{'$'},
end: []byte{'$'}, endBytesSingleDollar: []byte{'$'},
} endBytesDoubleDollar: []byte{'$', '$'},
var defaultDualDollarParser = &inlineParser{
start: []byte{'$', '$'},
end: []byte{'$', '$'},
} }
func NewInlineDollarParser() parser.InlineParser { func NewInlineDollarParser() parser.InlineParser {
return defaultInlineDollarParser return defaultInlineDollarParser
} }
func NewInlineDualDollarParser() parser.InlineParser {
return defaultDualDollarParser
}
var defaultInlineBracketParser = &inlineParser{ var defaultInlineBracketParser = &inlineParser{
start: []byte{'\\', '('}, trigger: []byte{'\\', '('},
end: []byte{'\\', ')'}, endBytesBracket: []byte{'\\', ')'},
} }
func NewInlineBracketParser() parser.InlineParser { func NewInlineBracketParser() parser.InlineParser {
@ -45,7 +39,7 @@ func NewInlineBracketParser() parser.InlineParser {
// Trigger triggers this parser on $ or \ // Trigger triggers this parser on $ or \
func (parser *inlineParser) Trigger() []byte { func (parser *inlineParser) Trigger() []byte {
return parser.start return parser.trigger
} }
func isPunctuation(b byte) bool { func isPunctuation(b byte) bool {
@ -64,33 +58,60 @@ func isAlphanumeric(b byte) bool {
func (parser *inlineParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { func (parser *inlineParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, _ := block.PeekLine() line, _ := block.PeekLine()
if !bytes.HasPrefix(line, parser.start) { if !bytes.HasPrefix(line, parser.trigger) {
// We'll catch this one on the next time round // We'll catch this one on the next time round
return nil return nil
} }
var startMarkLen int
var stopMark []byte
checkSurrounding := true
if line[0] == '$' {
startMarkLen = 1
stopMark = parser.endBytesSingleDollar
if len(line) > 1 {
if line[1] == '$' {
startMarkLen = 2
stopMark = parser.endBytesDoubleDollar
} else if line[1] == '`' {
pos := 1
for ; pos < len(line) && line[pos] == '`'; pos++ {
}
startMarkLen = pos
stopMark = bytes.Repeat([]byte{'`'}, pos)
stopMark[len(stopMark)-1] = '$'
checkSurrounding = false
}
}
} else {
startMarkLen = 2
stopMark = parser.endBytesBracket
}
if checkSurrounding {
precedingCharacter := block.PrecendingCharacter() precedingCharacter := block.PrecendingCharacter()
if precedingCharacter < 256 && (isAlphanumeric(byte(precedingCharacter)) || isPunctuation(byte(precedingCharacter))) { if precedingCharacter < 256 && (isAlphanumeric(byte(precedingCharacter)) || isPunctuation(byte(precedingCharacter))) {
// need to exclude things like `a$` from being considered a start // need to exclude things like `a$` from being considered a start
return nil return nil
} }
}
// move the opener marker point at the start of the text // move the opener marker point at the start of the text
opener := len(parser.start) opener := startMarkLen
// Now look for an ending line // Now look for an ending line
depth := 0 depth := 0
ender := -1 ender := -1
for i := opener; i < len(line); i++ { for i := opener; i < len(line); i++ {
if depth == 0 && bytes.HasPrefix(line[i:], parser.end) { if depth == 0 && bytes.HasPrefix(line[i:], stopMark) {
succeedingCharacter := byte(0) succeedingCharacter := byte(0)
if i+len(parser.end) < len(line) { if i+len(stopMark) < len(line) {
succeedingCharacter = line[i+len(parser.end)] succeedingCharacter = line[i+len(stopMark)]
} }
// check valid ending character // check valid ending character
isValidEndingChar := isPunctuation(succeedingCharacter) || isBracket(succeedingCharacter) || isValidEndingChar := isPunctuation(succeedingCharacter) || isBracket(succeedingCharacter) ||
succeedingCharacter == ' ' || succeedingCharacter == '\n' || succeedingCharacter == 0 succeedingCharacter == ' ' || succeedingCharacter == '\n' || succeedingCharacter == 0
if !isValidEndingChar { if checkSurrounding && !isValidEndingChar {
break break
} }
ender = i ender = i
@ -112,21 +133,12 @@ func (parser *inlineParser) Parse(parent ast.Node, block text.Reader, pc parser.
block.Advance(opener) block.Advance(opener)
_, pos := block.Position() _, pos := block.Position()
var node ast.Node node := NewInline()
if parser == defaultDualDollarParser {
node = NewInlineBlock()
} else {
node = NewInline()
}
segment := pos.WithStop(pos.Start + ender - opener) segment := pos.WithStop(pos.Start + ender - opener)
node.AppendChild(node, ast.NewRawTextSegment(segment)) node.AppendChild(node, ast.NewRawTextSegment(segment))
block.Advance(ender - opener + len(parser.end)) block.Advance(ender - opener + len(stopMark))
trimBlock(node, block)
if parser == defaultDualDollarParser {
trimBlock(&(node.(*InlineBlock)).Inline, block)
} else {
trimBlock(node.(*Inline), block)
}
return node return node
} }

View File

@ -50,5 +50,4 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
// RegisterFuncs registers the renderer for inline math nodes // RegisterFuncs registers the renderer for inline math nodes
func (r *InlineRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { func (r *InlineRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(KindInline, r.renderInline) reg.Register(KindInline, r.renderInline)
reg.Register(KindInlineBlock, r.renderInline)
} }

View File

@ -5,6 +5,7 @@ package math
import ( import (
"code.gitea.io/gitea/modules/markup/internal" "code.gitea.io/gitea/modules/markup/internal"
giteaUtil "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
@ -12,70 +13,45 @@ import (
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
type Options struct {
Enabled bool
ParseDollarInline bool
ParseDollarBlock bool
ParseSquareBlock bool
}
// Extension is a math extension // Extension is a math extension
type Extension struct { type Extension struct {
renderInternal *internal.RenderInternal renderInternal *internal.RenderInternal
enabled bool options Options
parseDollarInline bool
parseDollarBlock bool
}
// Option is the interface Options should implement
type Option interface {
SetOption(e *Extension)
}
type extensionFunc func(e *Extension)
func (fn extensionFunc) SetOption(e *Extension) {
fn(e)
}
// Enabled enables or disables this extension
func Enabled(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.enabled = value
})
} }
// NewExtension creates a new math extension with the provided options // NewExtension creates a new math extension with the provided options
func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension { func NewExtension(renderInternal *internal.RenderInternal, opts ...Options) *Extension {
opt := giteaUtil.OptionalArg(opts)
r := &Extension{ r := &Extension{
renderInternal: renderInternal, renderInternal: renderInternal,
enabled: true, options: opt,
parseDollarBlock: true,
parseDollarInline: true,
}
for _, o := range opts {
o.SetOption(r)
} }
return r return r
} }
// Extend extends goldmark with our parsers and renderers // Extend extends goldmark with our parsers and renderers
func (e *Extension) Extend(m goldmark.Markdown) { func (e *Extension) Extend(m goldmark.Markdown) {
if !e.enabled { if !e.options.Enabled {
return return
} }
m.Parser().AddOptions(parser.WithBlockParsers( inlines := []util.PrioritizedValue{util.Prioritized(NewInlineBracketParser(), 501)}
util.Prioritized(NewBlockParser(e.parseDollarBlock), 701), if e.options.ParseDollarInline {
)) inlines = append(inlines, util.Prioritized(NewInlineDollarParser(), 502))
inlines := []util.PrioritizedValue{
util.Prioritized(NewInlineBracketParser(), 501),
}
if e.parseDollarInline {
inlines = append(inlines, util.Prioritized(NewInlineDollarParser(), 503),
util.Prioritized(NewInlineDualDollarParser(), 502))
} }
m.Parser().AddOptions(parser.WithInlineParsers(inlines...)) m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
m.Parser().AddOptions(parser.WithBlockParsers(
util.Prioritized(NewBlockParser(e.options.ParseDollarBlock, e.options.ParseSquareBlock), 701),
))
m.Renderer().AddOptions(renderer.WithNodeRenderers( m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewBlockRenderer(e.renderInternal), 501), util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
util.Prioritized(NewInlineRenderer(e.renderInternal), 502), util.Prioritized(NewInlineRenderer(e.renderInternal), 502),

View File

@ -29,7 +29,7 @@
#repo-files-table .repo-file-line, #repo-files-table .repo-file-line,
#repo-files-table .repo-file-cell { #repo-files-table .repo-file-cell {
border-top: 1px solid var(--color-light-border); border-top: 1px solid var(--color-light-border);
padding: 6px 10px; padding: 8px 10px;
} }
#repo-files-table .repo-file-line:first-child { #repo-files-table .repo-file-line:first-child {
@ -41,7 +41,6 @@
display: flex; display: flex;
align-items: center; align-items: center;
gap: 0.5em; gap: 0.5em;
padding: 6px 10px;
} }
#repo-files-table .repo-file-last-commit { #repo-files-table .repo-file-last-commit {

View File

@ -1,8 +1,14 @@
import {displayError} from './common.ts'; import {displayError} from './common.ts';
function targetElement(el: Element) { function targetElement(el: Element): {target: Element, displayAsBlock: boolean} {
// The target element is either the parent "code block with loading indicator", or itself // The target element is either the parent "code block with loading indicator", or itself
return el.closest('.code-block.is-loading') ?? el; // It is designed to work for 2 cases (guaranteed by backend code):
// * <pre class="code-block is-loading"><code class="language-math display">...</code></pre>
// * <code class="language-math">...</code>
return {
target: el.closest('.code-block.is-loading') ?? el,
displayAsBlock: el.classList.contains('display'),
};
} }
export async function renderMath(): Promise<void> { export async function renderMath(): Promise<void> {
@ -19,7 +25,7 @@ export async function renderMath(): Promise<void> {
const MAX_EXPAND = 1000; const MAX_EXPAND = 1000;
for (const el of els) { for (const el of els) {
const target = targetElement(el); const {target, displayAsBlock} = targetElement(el);
if (target.hasAttribute('data-render-done')) continue; if (target.hasAttribute('data-render-done')) continue;
const source = el.textContent; const source = el.textContent;
@ -27,16 +33,12 @@ export async function renderMath(): Promise<void> {
displayError(target, new Error(`Math source of ${source.length} characters exceeds the maximum allowed length of ${MAX_CHARS}.`)); displayError(target, new Error(`Math source of ${source.length} characters exceeds the maximum allowed length of ${MAX_CHARS}.`));
continue; continue;
} }
const displayMode = el.classList.contains('display');
const nodeName = displayMode ? 'p' : 'span';
try { try {
const tempEl = document.createElement(nodeName); const tempEl = document.createElement(displayAsBlock ? 'p' : 'span');
katex.render(source, tempEl, { katex.render(source, tempEl, {
maxSize: MAX_SIZE, maxSize: MAX_SIZE,
maxExpand: MAX_EXPAND, maxExpand: MAX_EXPAND,
displayMode, displayMode: displayAsBlock, // katex: true for display (block) mode, false for inline mode
}); });
target.replaceWith(tempEl); target.replaceWith(tempEl);
} catch (error) { } catch (error) {