Refactor markdown math render (#32831)

Add more tests
This commit is contained in:
wxiaoguang 2024-12-14 13:43:05 +08:00 committed by GitHub
parent 82c59d52ea
commit cc5ff98e0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 137 additions and 151 deletions

View File

@ -78,26 +78,23 @@ func (r *GlodmarkRender) Renderer() renderer.Renderer {
func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
languageBytes, _ := c.Language()
languageStr := giteautil.IfZero(string(languageBytes), "text")
languageStr := string(language)
preClasses := []string{"code-block"}
preClasses := "code-block"
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
preClasses += " is-loading"
}
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " "))
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, preClasses)
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language))
// include language-x class as part of commonmark spec, "chroma" class is used to highlight the code
// the "display" class is used by "js/markup/math.ts" to render the code element as a block
// the "math.ts" strictly depends on the structure: <pre class="code-block is-loading"><code class="language-math display">...</code></pre>
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, languageStr)
if err != nil {
return
}
@ -128,7 +125,12 @@ func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
),
highlighting.WithWrapperRenderer(r.highlightingRenderer),
),
math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
math.NewExtension(&ctx.RenderInternal, math.Options{
Enabled: setting.Markdown.EnableMath,
ParseDollarInline: true,
ParseDollarBlock: true,
ParseSquareBlock: true, // TODO: this is a bad syntax, it should be deprecated in the future (by some config options)
}),
meta.Meta,
),
goldmark.WithParserOptions(

View File

@ -12,8 +12,9 @@ import (
"github.com/stretchr/testify/assert"
)
const nl = "\n"
func TestMathRender(t *testing.T) {
const nl = "\n"
testcases := []struct {
testcase string
expected string
@ -86,6 +87,18 @@ func TestMathRender(t *testing.T) {
`$\text{$b$}$`,
`<p><code class="language-math">\text{$b$}</code></p>` + nl,
},
{
"a$`b`$c",
`<p>a<code class="language-math">b</code>c</p>` + nl,
},
{
"a $`b`$ c",
`<p>a <code class="language-math">b</code> c</p>` + nl,
},
{
"a$``b``$c x$```y```$z",
`<p>a<code class="language-math">b</code>c x<code class="language-math">y</code>z</p>` + nl,
},
}
for _, test := range testcases {
@ -215,6 +228,11 @@ x
</ol>
`,
},
{
"inline-non-math",
`\[x]`,
`<p>[x]</p>` + nl,
},
}
for _, test := range testcases {

View File

@ -16,16 +16,18 @@ import (
type blockParser struct {
parseDollars bool
parseSquare bool
endBytesDollars []byte
endBytesBracket []byte
endBytesSquare []byte
}
// NewBlockParser creates a new math BlockParser
func NewBlockParser(parseDollarBlocks bool) parser.BlockParser {
func NewBlockParser(parseDollars, parseSquare bool) parser.BlockParser {
return &blockParser{
parseDollars: parseDollarBlocks,
parseDollars: parseDollars,
parseSquare: parseSquare,
endBytesDollars: []byte{'$', '$'},
endBytesBracket: []byte{'\\', ']'},
endBytesSquare: []byte{'\\', ']'},
}
}
@ -40,7 +42,7 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
var dollars bool
if b.parseDollars && line[pos] == '$' && line[pos+1] == '$' {
dollars = true
} else if line[pos] == '\\' && line[pos+1] == '[' {
} else if b.parseSquare && line[pos] == '\\' && line[pos+1] == '[' {
if len(line[pos:]) >= 3 && line[pos+2] == '!' && bytes.Contains(line[pos:], []byte(`\]`)) {
// do not process escaped attention block: "> \[!NOTE\]"
return nil, parser.NoChildren
@ -53,10 +55,10 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
node := NewBlock(dollars, pos)
// Now we need to check if the ending block is on the segment...
endBytes := giteaUtil.Iif(dollars, b.endBytesDollars, b.endBytesBracket)
endBytes := giteaUtil.Iif(dollars, b.endBytesDollars, b.endBytesSquare)
idx := bytes.Index(line[pos+2:], endBytes)
if idx >= 0 {
// for case $$ ... $$ any other text
// for case: "$$ ... $$ any other text" (this case will be handled by the inline parser)
for i := pos + 2 + idx + 2; i < len(line); i++ {
if line[i] != ' ' && line[i] != '\n' {
return nil, parser.NoChildren
@ -70,6 +72,13 @@ func (b *blockParser) Open(parent ast.Node, reader text.Reader, pc parser.Contex
return node, parser.Close | parser.NoChildren
}
// for case "\[ ... ]" (no close marker on the same line)
for i := pos + 2 + idx + 2; i < len(line); i++ {
if line[i] != ' ' && line[i] != '\n' {
return nil, parser.NoChildren
}
}
segment.Start += pos + 2
node.Lines().Append(segment)
return node, parser.NoChildren
@ -85,7 +94,7 @@ func (b *blockParser) Continue(node ast.Node, reader text.Reader, pc parser.Cont
line, segment := reader.PeekLine()
w, pos := util.IndentWidth(line, reader.LineOffset())
if w < 4 {
endBytes := giteaUtil.Iif(block.Dollars, b.endBytesDollars, b.endBytesBracket)
endBytes := giteaUtil.Iif(block.Dollars, b.endBytesDollars, b.endBytesSquare)
if bytes.HasPrefix(line[pos:], endBytes) && util.IsBlank(line[pos+len(endBytes):]) {
if util.IsBlank(line[pos+len(endBytes):]) {
newline := giteaUtil.Iif(line[len(line)-1] != '\n', 0, 1)

View File

@ -1,31 +0,0 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package math
import (
"github.com/yuin/goldmark/ast"
)
// InlineBlock represents inline math e.g. $$...$$
type InlineBlock struct {
Inline
}
// InlineBlock implements InlineBlock.
func (n *InlineBlock) InlineBlock() {}
// KindInlineBlock is the kind for math inline block
var KindInlineBlock = ast.NewNodeKind("MathInlineBlock")
// Kind returns KindInlineBlock
func (n *InlineBlock) Kind() ast.NodeKind {
return KindInlineBlock
}
// NewInlineBlock creates a new ast math inline block node
func NewInlineBlock() *InlineBlock {
return &InlineBlock{
Inline{},
}
}

View File

@ -8,7 +8,7 @@ import (
"github.com/yuin/goldmark/util"
)
// Inline represents inline math e.g. $...$ or \(...\)
// Inline struct represents inline math e.g. $...$ or \(...\)
type Inline struct {
ast.BaseInline
}

View File

@ -12,31 +12,25 @@ import (
)
type inlineParser struct {
start []byte
end []byte
trigger []byte
endBytesSingleDollar []byte
endBytesDoubleDollar []byte
endBytesBracket []byte
}
var defaultInlineDollarParser = &inlineParser{
start: []byte{'$'},
end: []byte{'$'},
}
var defaultDualDollarParser = &inlineParser{
start: []byte{'$', '$'},
end: []byte{'$', '$'},
trigger: []byte{'$'},
endBytesSingleDollar: []byte{'$'},
endBytesDoubleDollar: []byte{'$', '$'},
}
func NewInlineDollarParser() parser.InlineParser {
return defaultInlineDollarParser
}
func NewInlineDualDollarParser() parser.InlineParser {
return defaultDualDollarParser
}
var defaultInlineBracketParser = &inlineParser{
start: []byte{'\\', '('},
end: []byte{'\\', ')'},
trigger: []byte{'\\', '('},
endBytesBracket: []byte{'\\', ')'},
}
func NewInlineBracketParser() parser.InlineParser {
@ -45,7 +39,7 @@ func NewInlineBracketParser() parser.InlineParser {
// Trigger triggers this parser on $ or \
func (parser *inlineParser) Trigger() []byte {
return parser.start
return parser.trigger
}
func isPunctuation(b byte) bool {
@ -64,33 +58,60 @@ func isAlphanumeric(b byte) bool {
func (parser *inlineParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, _ := block.PeekLine()
if !bytes.HasPrefix(line, parser.start) {
if !bytes.HasPrefix(line, parser.trigger) {
// We'll catch this one on the next time round
return nil
}
precedingCharacter := block.PrecendingCharacter()
if precedingCharacter < 256 && (isAlphanumeric(byte(precedingCharacter)) || isPunctuation(byte(precedingCharacter))) {
// need to exclude things like `a$` from being considered a start
return nil
var startMarkLen int
var stopMark []byte
checkSurrounding := true
if line[0] == '$' {
startMarkLen = 1
stopMark = parser.endBytesSingleDollar
if len(line) > 1 {
if line[1] == '$' {
startMarkLen = 2
stopMark = parser.endBytesDoubleDollar
} else if line[1] == '`' {
pos := 1
for ; pos < len(line) && line[pos] == '`'; pos++ {
}
startMarkLen = pos
stopMark = bytes.Repeat([]byte{'`'}, pos)
stopMark[len(stopMark)-1] = '$'
checkSurrounding = false
}
}
} else {
startMarkLen = 2
stopMark = parser.endBytesBracket
}
if checkSurrounding {
precedingCharacter := block.PrecendingCharacter()
if precedingCharacter < 256 && (isAlphanumeric(byte(precedingCharacter)) || isPunctuation(byte(precedingCharacter))) {
// need to exclude things like `a$` from being considered a start
return nil
}
}
// move the opener marker point at the start of the text
opener := len(parser.start)
opener := startMarkLen
// Now look for an ending line
depth := 0
ender := -1
for i := opener; i < len(line); i++ {
if depth == 0 && bytes.HasPrefix(line[i:], parser.end) {
if depth == 0 && bytes.HasPrefix(line[i:], stopMark) {
succeedingCharacter := byte(0)
if i+len(parser.end) < len(line) {
succeedingCharacter = line[i+len(parser.end)]
if i+len(stopMark) < len(line) {
succeedingCharacter = line[i+len(stopMark)]
}
// check valid ending character
isValidEndingChar := isPunctuation(succeedingCharacter) || isBracket(succeedingCharacter) ||
succeedingCharacter == ' ' || succeedingCharacter == '\n' || succeedingCharacter == 0
if !isValidEndingChar {
if checkSurrounding && !isValidEndingChar {
break
}
ender = i
@ -112,21 +133,12 @@ func (parser *inlineParser) Parse(parent ast.Node, block text.Reader, pc parser.
block.Advance(opener)
_, pos := block.Position()
var node ast.Node
if parser == defaultDualDollarParser {
node = NewInlineBlock()
} else {
node = NewInline()
}
node := NewInline()
segment := pos.WithStop(pos.Start + ender - opener)
node.AppendChild(node, ast.NewRawTextSegment(segment))
block.Advance(ender - opener + len(parser.end))
if parser == defaultDualDollarParser {
trimBlock(&(node.(*InlineBlock)).Inline, block)
} else {
trimBlock(node.(*Inline), block)
}
block.Advance(ender - opener + len(stopMark))
trimBlock(node, block)
return node
}

View File

@ -50,5 +50,4 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
// RegisterFuncs registers the renderer for inline math nodes
func (r *InlineRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(KindInline, r.renderInline)
reg.Register(KindInlineBlock, r.renderInline)
}

View File

@ -5,6 +5,7 @@ package math
import (
"code.gitea.io/gitea/modules/markup/internal"
giteaUtil "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
@ -12,70 +13,45 @@ import (
"github.com/yuin/goldmark/util"
)
type Options struct {
Enabled bool
ParseDollarInline bool
ParseDollarBlock bool
ParseSquareBlock bool
}
// Extension is a math extension
type Extension struct {
renderInternal *internal.RenderInternal
enabled bool
parseDollarInline bool
parseDollarBlock bool
}
// Option is the interface Options should implement
type Option interface {
SetOption(e *Extension)
}
type extensionFunc func(e *Extension)
func (fn extensionFunc) SetOption(e *Extension) {
fn(e)
}
// Enabled enables or disables this extension
func Enabled(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.enabled = value
})
renderInternal *internal.RenderInternal
options Options
}
// NewExtension creates a new math extension with the provided options
func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension {
func NewExtension(renderInternal *internal.RenderInternal, opts ...Options) *Extension {
opt := giteaUtil.OptionalArg(opts)
r := &Extension{
renderInternal: renderInternal,
enabled: true,
parseDollarBlock: true,
parseDollarInline: true,
}
for _, o := range opts {
o.SetOption(r)
renderInternal: renderInternal,
options: opt,
}
return r
}
// Extend extends goldmark with our parsers and renderers
func (e *Extension) Extend(m goldmark.Markdown) {
if !e.enabled {
if !e.options.Enabled {
return
}
m.Parser().AddOptions(parser.WithBlockParsers(
util.Prioritized(NewBlockParser(e.parseDollarBlock), 701),
))
inlines := []util.PrioritizedValue{
util.Prioritized(NewInlineBracketParser(), 501),
}
if e.parseDollarInline {
inlines = append(inlines, util.Prioritized(NewInlineDollarParser(), 503),
util.Prioritized(NewInlineDualDollarParser(), 502))
inlines := []util.PrioritizedValue{util.Prioritized(NewInlineBracketParser(), 501)}
if e.options.ParseDollarInline {
inlines = append(inlines, util.Prioritized(NewInlineDollarParser(), 502))
}
m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
m.Parser().AddOptions(parser.WithBlockParsers(
util.Prioritized(NewBlockParser(e.options.ParseDollarBlock, e.options.ParseSquareBlock), 701),
))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
util.Prioritized(NewInlineRenderer(e.renderInternal), 502),

View File

@ -29,7 +29,7 @@
#repo-files-table .repo-file-line,
#repo-files-table .repo-file-cell {
border-top: 1px solid var(--color-light-border);
padding: 6px 10px;
padding: 8px 10px;
}
#repo-files-table .repo-file-line:first-child {
@ -41,7 +41,6 @@
display: flex;
align-items: center;
gap: 0.5em;
padding: 6px 10px;
}
#repo-files-table .repo-file-last-commit {

View File

@ -1,8 +1,14 @@
import {displayError} from './common.ts';
function targetElement(el: Element) {
function targetElement(el: Element): {target: Element, displayAsBlock: boolean} {
// The target element is either the parent "code block with loading indicator", or itself
return el.closest('.code-block.is-loading') ?? el;
// It is designed to work for 2 cases (guaranteed by backend code):
// * <pre class="code-block is-loading"><code class="language-math display">...</code></pre>
// * <code class="language-math">...</code>
return {
target: el.closest('.code-block.is-loading') ?? el,
displayAsBlock: el.classList.contains('display'),
};
}
export async function renderMath(): Promise<void> {
@ -19,7 +25,7 @@ export async function renderMath(): Promise<void> {
const MAX_EXPAND = 1000;
for (const el of els) {
const target = targetElement(el);
const {target, displayAsBlock} = targetElement(el);
if (target.hasAttribute('data-render-done')) continue;
const source = el.textContent;
@ -27,16 +33,12 @@ export async function renderMath(): Promise<void> {
displayError(target, new Error(`Math source of ${source.length} characters exceeds the maximum allowed length of ${MAX_CHARS}.`));
continue;
}
const displayMode = el.classList.contains('display');
const nodeName = displayMode ? 'p' : 'span';
try {
const tempEl = document.createElement(nodeName);
const tempEl = document.createElement(displayAsBlock ? 'p' : 'span');
katex.render(source, tempEl, {
maxSize: MAX_SIZE,
maxExpand: MAX_EXPAND,
displayMode,
displayMode: displayAsBlock, // katex: true for display (block) mode, false for inline mode
});
target.replaceWith(tempEl);
} catch (error) {