4
0
mirror of https://github.com/cwinfo/matterbridge.git synced 2025-07-06 10:44:04 +00:00

Fix regression in HTML handling (telegram). Closes #734

* Revert back to blackfriday v1
* Add testing
This commit is contained in:
Wim
2019-02-24 15:13:56 +01:00
parent f92735d35d
commit 96841c70c7
22 changed files with 2336 additions and 2562 deletions

View File

@ -1,200 +1,230 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
//
// Markdown parsing and processing
//
//
package blackfriday
import (
"bytes"
"fmt"
"io"
"strings"
"unicode/utf8"
)
//
// Markdown parsing and processing
//
// Version string of the package. Appears in the rendered document when
// CompletePage flag is on.
const Version = "2.0"
// Extensions is a bitwise or'ed collection of enabled Blackfriday's
// extensions.
type Extensions int
const VERSION = "1.5"
// These are the supported markdown parsing extensions.
// OR these values together to select multiple extensions.
const (
NoExtensions Extensions = 0
NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words
Tables // Render tables
FencedCode // Render fenced code blocks
Autolink // Detect embedded URLs that are not explicitly marked
Strikethrough // Strikethrough text using ~~test~~
LaxHTMLBlocks // Loosen up HTML block parsing rules
SpaceHeadings // Be strict about prefix heading rules
HardLineBreak // Translate newlines into line breaks
TabSizeEight // Expand tabs to eight spaces instead of four
Footnotes // Pandoc-style footnotes
NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
HeadingIDs // specify heading IDs with {#id}
Titleblock // Titleblock ala pandoc
AutoHeadingIDs // Create the heading ID from the text
BackslashLineBreak // Translate trailing backslashes into line breaks
DefinitionLists // Render definition lists
EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words
EXTENSION_TABLES // render tables
EXTENSION_FENCED_CODE // render fenced code blocks
EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked
EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~
EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules
EXTENSION_SPACE_HEADERS // be strict about prefix header rules
EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks
EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four
EXTENSION_FOOTNOTES // Pandoc-style footnotes
EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block
EXTENSION_HEADER_IDS // specify header IDs with {#id}
EXTENSION_TITLEBLOCK // Titleblock ala pandoc
EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text
EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks
EXTENSION_DEFINITION_LISTS // render definition lists
EXTENSION_JOIN_LINES // delete newline and join lines
CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
commonHtmlFlags = 0 |
HTML_USE_XHTML |
HTML_USE_SMARTYPANTS |
HTML_SMARTYPANTS_FRACTIONS |
HTML_SMARTYPANTS_DASHES |
HTML_SMARTYPANTS_LATEX_DASHES
CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode |
Autolink | Strikethrough | SpaceHeadings | HeadingIDs |
BackslashLineBreak | DefinitionLists
commonExtensions = 0 |
EXTENSION_NO_INTRA_EMPHASIS |
EXTENSION_TABLES |
EXTENSION_FENCED_CODE |
EXTENSION_AUTOLINK |
EXTENSION_STRIKETHROUGH |
EXTENSION_SPACE_HEADERS |
EXTENSION_HEADER_IDS |
EXTENSION_BACKSLASH_LINE_BREAK |
EXTENSION_DEFINITION_LISTS
)
// ListType contains bitwise or'ed flags for list and list item objects.
type ListType int
// These are the possible flag values for the link renderer.
// Only a single one of these values will be used; they are not ORed together.
// These are mostly of interest if you are writing a new output format.
const (
LINK_TYPE_NOT_AUTOLINK = iota
LINK_TYPE_NORMAL
LINK_TYPE_EMAIL
)
// These are the possible flag values for the ListItem renderer.
// Multiple flag values may be ORed together.
// These are mostly of interest if you are writing a new output format.
const (
ListTypeOrdered ListType = 1 << iota
ListTypeDefinition
ListTypeTerm
ListItemContainsBlock
ListItemBeginningOfList // TODO: figure out if this is of any use now
ListItemEndOfList
LIST_TYPE_ORDERED = 1 << iota
LIST_TYPE_DEFINITION
LIST_TYPE_TERM
LIST_ITEM_CONTAINS_BLOCK
LIST_ITEM_BEGINNING_OF_LIST
LIST_ITEM_END_OF_LIST
)
// CellAlignFlags holds a type of alignment in a table cell.
type CellAlignFlags int
// These are the possible flag values for the table cell renderer.
// Only a single one of these values will be used; they are not ORed together.
// These are mostly of interest if you are writing a new output format.
const (
TableAlignmentLeft CellAlignFlags = 1 << iota
TableAlignmentRight
TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
TABLE_ALIGNMENT_LEFT = 1 << iota
TABLE_ALIGNMENT_RIGHT
TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT)
)
// The size of a tab stop.
const (
TabSizeDefault = 4
TabSizeDouble = 8
TAB_SIZE_DEFAULT = 4
TAB_SIZE_EIGHT = 8
)
// blockTags is a set of tags that are recognized as HTML block tags.
// Any of these can be included in markdown text without special escaping.
var blockTags = map[string]struct{}{
"blockquote": struct{}{},
"del": struct{}{},
"div": struct{}{},
"dl": struct{}{},
"fieldset": struct{}{},
"form": struct{}{},
"h1": struct{}{},
"h2": struct{}{},
"h3": struct{}{},
"h4": struct{}{},
"h5": struct{}{},
"h6": struct{}{},
"iframe": struct{}{},
"ins": struct{}{},
"math": struct{}{},
"noscript": struct{}{},
"ol": struct{}{},
"pre": struct{}{},
"p": struct{}{},
"script": struct{}{},
"style": struct{}{},
"table": struct{}{},
"ul": struct{}{},
"blockquote": {},
"del": {},
"div": {},
"dl": {},
"fieldset": {},
"form": {},
"h1": {},
"h2": {},
"h3": {},
"h4": {},
"h5": {},
"h6": {},
"iframe": {},
"ins": {},
"math": {},
"noscript": {},
"ol": {},
"pre": {},
"p": {},
"script": {},
"style": {},
"table": {},
"ul": {},
// HTML5
"address": struct{}{},
"article": struct{}{},
"aside": struct{}{},
"canvas": struct{}{},
"figcaption": struct{}{},
"figure": struct{}{},
"footer": struct{}{},
"header": struct{}{},
"hgroup": struct{}{},
"main": struct{}{},
"nav": struct{}{},
"output": struct{}{},
"progress": struct{}{},
"section": struct{}{},
"video": struct{}{},
"address": {},
"article": {},
"aside": {},
"canvas": {},
"figcaption": {},
"figure": {},
"footer": {},
"header": {},
"hgroup": {},
"main": {},
"nav": {},
"output": {},
"progress": {},
"section": {},
"video": {},
}
// Renderer is the rendering interface. This is mostly of interest if you are
// implementing a new rendering format.
// Renderer is the rendering interface.
// This is mostly of interest if you are implementing a new rendering format.
//
// Only an HTML implementation is provided in this repository, see the README
// for external implementations.
// When a byte slice is provided, it contains the (rendered) contents of the
// element.
//
// When a callback is provided instead, it will write the contents of the
// respective element directly to the output buffer and return true on success.
// If the callback returns false, the rendering function should reset the
// output buffer as though it had never been called.
//
// Currently Html and Latex implementations are provided
type Renderer interface {
// RenderNode is the main rendering method. It will be called once for
// every leaf node and twice for every non-leaf node (first with
// entering=true, then with entering=false). The method should write its
// rendition of the node to the supplied writer w.
RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
// block-level callbacks
BlockCode(out *bytes.Buffer, text []byte, infoString string)
BlockQuote(out *bytes.Buffer, text []byte)
BlockHtml(out *bytes.Buffer, text []byte)
Header(out *bytes.Buffer, text func() bool, level int, id string)
HRule(out *bytes.Buffer)
List(out *bytes.Buffer, text func() bool, flags int)
ListItem(out *bytes.Buffer, text []byte, flags int)
Paragraph(out *bytes.Buffer, text func() bool)
Table(out *bytes.Buffer, header []byte, body []byte, columnData []int)
TableRow(out *bytes.Buffer, text []byte)
TableHeaderCell(out *bytes.Buffer, text []byte, flags int)
TableCell(out *bytes.Buffer, text []byte, flags int)
Footnotes(out *bytes.Buffer, text func() bool)
FootnoteItem(out *bytes.Buffer, name, text []byte, flags int)
TitleBlock(out *bytes.Buffer, text []byte)
// RenderHeader is a method that allows the renderer to produce some
// content preceding the main body of the output document. The header is
// understood in the broad sense here. For example, the default HTML
// renderer will write not only the HTML document preamble, but also the
// table of contents if it was requested.
//
// The method will be passed an entire document tree, in case a particular
// implementation needs to inspect it to produce output.
//
// The output should be written to the supplied writer w. If your
// implementation has no header to write, supply an empty implementation.
RenderHeader(w io.Writer, ast *Node)
// Span-level callbacks
AutoLink(out *bytes.Buffer, link []byte, kind int)
CodeSpan(out *bytes.Buffer, text []byte)
DoubleEmphasis(out *bytes.Buffer, text []byte)
Emphasis(out *bytes.Buffer, text []byte)
Image(out *bytes.Buffer, link []byte, title []byte, alt []byte)
LineBreak(out *bytes.Buffer)
Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
RawHtmlTag(out *bytes.Buffer, tag []byte)
TripleEmphasis(out *bytes.Buffer, text []byte)
StrikeThrough(out *bytes.Buffer, text []byte)
FootnoteRef(out *bytes.Buffer, ref []byte, id int)
// RenderFooter is a symmetric counterpart of RenderHeader.
RenderFooter(w io.Writer, ast *Node)
// Low-level callbacks
Entity(out *bytes.Buffer, entity []byte)
NormalText(out *bytes.Buffer, text []byte)
// Header and footer
DocumentHeader(out *bytes.Buffer)
DocumentFooter(out *bytes.Buffer)
GetFlags() int
}
// Callback functions for inline parsing. One such function is defined
// for each character that triggers a response when parsing inline data.
type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node)
type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int
// Markdown is a type that holds extensions and the runtime state used by
// Parse, and the renderer. You can not use it directly, construct it with New.
type Markdown struct {
renderer Renderer
referenceOverride ReferenceOverrideFunc
refs map[string]*reference
inlineCallback [256]inlineParser
extensions Extensions
nesting int
maxNesting int
insideLink bool
// Parser holds runtime state used by the parser.
// This is constructed by the Markdown function.
type parser struct {
r Renderer
refOverride ReferenceOverrideFunc
refs map[string]*reference
inlineCallback [256]inlineParser
flags int
nesting int
maxNesting int
insideLink bool
// Footnotes need to be ordered as well as available to quickly check for
// presence. If a ref is also a footnote, it's stored both in refs and here
// in notes. Slice is nil if footnotes not enabled.
notes []*reference
doc *Node
tip *Node // = doc
oldTip *Node
lastMatchedContainer *Node // = doc
allClosed bool
notes []*reference
notesRecord map[string]struct{}
}
func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
if p.referenceOverride != nil {
r, overridden := p.referenceOverride(refid)
func (p *parser) getRef(refid string) (ref *reference, found bool) {
if p.refOverride != nil {
r, overridden := p.refOverride(refid)
if overridden {
if r == nil {
return nil, false
@ -202,7 +232,7 @@ func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
return &reference{
link: []byte(r.Link),
title: []byte(r.Title),
noteID: 0,
noteId: 0,
hasBlock: false,
text: []byte(r.Text)}, true
}
@ -212,34 +242,9 @@ func (p *Markdown) getRef(refid string) (ref *reference, found bool) {
return ref, found
}
func (p *Markdown) finalize(block *Node) {
above := block.Parent
block.open = false
p.tip = above
}
func (p *Markdown) addChild(node NodeType, offset uint32) *Node {
return p.addExistingChild(NewNode(node), offset)
}
func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node {
for !p.tip.canContain(node.Type) {
p.finalize(p.tip)
}
p.tip.AppendChild(node)
p.tip = node
return node
}
func (p *Markdown) closeUnmatchedBlocks() {
if !p.allClosed {
for p.oldTip != p.lastMatchedContainer {
parent := p.oldTip.Parent
p.finalize(p.oldTip)
p.oldTip = parent
}
p.allClosed = true
}
func (p *parser) isFootnote(ref *reference) bool {
_, ok := p.notesRecord[string(ref.link)]
return ok
}
//
@ -266,27 +271,102 @@ type Reference struct {
// See the documentation in Options for more details on use-case.
type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool)
// New constructs a Markdown processor. You can use the same With* functions as
// for Run() to customize parser's behavior and the renderer.
func New(opts ...Option) *Markdown {
var p Markdown
for _, opt := range opts {
opt(&p)
// Options represents configurable overrides and callbacks (in addition to the
// extension flag set) for configuring a Markdown parse.
type Options struct {
// Extensions is a flag set of bit-wise ORed extension bits. See the
// EXTENSION_* flags defined in this package.
Extensions int
// ReferenceOverride is an optional function callback that is called every
// time a reference is resolved.
//
// In Markdown, the link reference syntax can be made to resolve a link to
// a reference instead of an inline URL, in one of the following ways:
//
// * [link text][refid]
// * [refid][]
//
// Usually, the refid is defined at the bottom of the Markdown document. If
// this override function is provided, the refid is passed to the override
// function first, before consulting the defined refids at the bottom. If
// the override function indicates an override did not occur, the refids at
// the bottom will be used to fill in the link details.
ReferenceOverride ReferenceOverrideFunc
}
// MarkdownBasic is a convenience function for simple rendering.
// It processes markdown input with no extensions enabled.
func MarkdownBasic(input []byte) []byte {
// set up the HTML renderer
htmlFlags := HTML_USE_XHTML
renderer := HtmlRenderer(htmlFlags, "", "")
// set up the parser
return MarkdownOptions(input, renderer, Options{Extensions: 0})
}
// Call Markdown with most useful extensions enabled
// MarkdownCommon is a convenience function for simple rendering.
// It processes markdown input with common extensions enabled, including:
//
// * Smartypants processing with smart fractions and LaTeX dashes
//
// * Intra-word emphasis suppression
//
// * Tables
//
// * Fenced code blocks
//
// * Autolinking
//
// * Strikethrough support
//
// * Strict header parsing
//
// * Custom Header IDs
func MarkdownCommon(input []byte) []byte {
// set up the HTML renderer
renderer := HtmlRenderer(commonHtmlFlags, "", "")
return MarkdownOptions(input, renderer, Options{
Extensions: commonExtensions})
}
// Markdown is the main rendering function.
// It parses and renders a block of markdown-encoded text.
// The supplied Renderer is used to format the output, and extensions dictates
// which non-standard extensions are enabled.
//
// To use the supplied Html or LaTeX renderers, see HtmlRenderer and
// LatexRenderer, respectively.
func Markdown(input []byte, renderer Renderer, extensions int) []byte {
return MarkdownOptions(input, renderer, Options{
Extensions: extensions})
}
// MarkdownOptions is just like Markdown but takes additional options through
// the Options struct.
func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
// no point in parsing if we can't render
if renderer == nil {
return nil
}
extensions := opts.Extensions
// fill in the render structure
p := new(parser)
p.r = renderer
p.flags = extensions
p.refOverride = opts.ReferenceOverride
p.refs = make(map[string]*reference)
p.maxNesting = 16
p.insideLink = false
docNode := NewNode(Document)
p.doc = docNode
p.tip = docNode
p.oldTip = docNode
p.lastMatchedContainer = docNode
p.allClosed = true
// register inline parsers
p.inlineCallback[' '] = maybeLineBreak
p.inlineCallback['*'] = emphasis
p.inlineCallback['_'] = emphasis
if p.extensions&Strikethrough != 0 {
if extensions&EXTENSION_STRIKETHROUGH != 0 {
p.inlineCallback['~'] = emphasis
}
p.inlineCallback['`'] = codeSpan
@ -295,166 +375,116 @@ func New(opts ...Option) *Markdown {
p.inlineCallback['<'] = leftAngle
p.inlineCallback['\\'] = escape
p.inlineCallback['&'] = entity
p.inlineCallback['!'] = maybeImage
p.inlineCallback['^'] = maybeInlineFootnote
if p.extensions&Autolink != 0 {
p.inlineCallback['h'] = maybeAutoLink
p.inlineCallback['m'] = maybeAutoLink
p.inlineCallback['f'] = maybeAutoLink
p.inlineCallback['H'] = maybeAutoLink
p.inlineCallback['M'] = maybeAutoLink
p.inlineCallback['F'] = maybeAutoLink
if extensions&EXTENSION_AUTOLINK != 0 {
p.inlineCallback[':'] = autoLink
}
if p.extensions&Footnotes != 0 {
if extensions&EXTENSION_FOOTNOTES != 0 {
p.notes = make([]*reference, 0)
p.notesRecord = make(map[string]struct{})
}
return &p
first := firstPass(p, input)
second := secondPass(p, first)
return second
}
// Option customizes the Markdown processor's default behavior.
type Option func(*Markdown)
// WithRenderer allows you to override the default renderer.
func WithRenderer(r Renderer) Option {
return func(p *Markdown) {
p.renderer = r
// first pass:
// - normalize newlines
// - extract references (outside of fenced code blocks)
// - expand tabs (outside of fenced code blocks)
// - copy everything else
func firstPass(p *parser, input []byte) []byte {
var out bytes.Buffer
tabSize := TAB_SIZE_DEFAULT
if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
tabSize = TAB_SIZE_EIGHT
}
beg := 0
lastFencedCodeBlockEnd := 0
for beg < len(input) {
// Find end of this line, then process the line.
end := beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&EXTENSION_FENCED_CODE != 0 {
// track fenced code block boundaries to suppress tab expansion
// and reference extraction inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
beg += refEnd
continue
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
out.WriteByte('\n')
beg = end
}
// empty input?
if out.Len() == 0 {
out.WriteByte('\n')
}
return out.Bytes()
}
// WithExtensions allows you to pick some of the many extensions provided by
// Blackfriday. You can bitwise OR them.
func WithExtensions(e Extensions) Option {
return func(p *Markdown) {
p.extensions = e
}
}
// second pass: actual rendering
func secondPass(p *parser, input []byte) []byte {
var output bytes.Buffer
// WithNoExtensions turns off all extensions and custom behavior.
func WithNoExtensions() Option {
return func(p *Markdown) {
p.extensions = NoExtensions
p.renderer = NewHTMLRenderer(HTMLRendererParameters{
Flags: HTMLFlagsNone,
p.r.DocumentHeader(&output)
p.block(&output, input)
if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 {
p.r.Footnotes(&output, func() bool {
flags := LIST_ITEM_BEGINNING_OF_LIST
for i := 0; i < len(p.notes); i += 1 {
ref := p.notes[i]
var buf bytes.Buffer
if ref.hasBlock {
flags |= LIST_ITEM_CONTAINS_BLOCK
p.block(&buf, ref.title)
} else {
p.inline(&buf, ref.title)
}
p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags)
flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK
}
return true
})
}
}
// WithRefOverride sets an optional function callback that is called every
// time a reference is resolved.
//
// In Markdown, the link reference syntax can be made to resolve a link to
// a reference instead of an inline URL, in one of the following ways:
//
// * [link text][refid]
// * [refid][]
//
// Usually, the refid is defined at the bottom of the Markdown document. If
// this override function is provided, the refid is passed to the override
// function first, before consulting the defined refids at the bottom. If
// the override function indicates an override did not occur, the refids at
// the bottom will be used to fill in the link details.
func WithRefOverride(o ReferenceOverrideFunc) Option {
return func(p *Markdown) {
p.referenceOverride = o
}
}
p.r.DocumentFooter(&output)
// Run is the main entry point to Blackfriday. It parses and renders a
// block of markdown-encoded text.
//
// The simplest invocation of Run takes one argument, input:
// output := Run(input)
// This will parse the input with CommonExtensions enabled and render it with
// the default HTMLRenderer (with CommonHTMLFlags).
//
// Variadic arguments opts can customize the default behavior. Since Markdown
// type does not contain exported fields, you can not use it directly. Instead,
// use the With* functions. For example, this will call the most basic
// functionality, with no extensions:
// output := Run(input, WithNoExtensions())
//
// You can use any number of With* arguments, even contradicting ones. They
// will be applied in order of appearance and the latter will override the
// former:
// output := Run(input, WithNoExtensions(), WithExtensions(exts),
// WithRenderer(yourRenderer))
func Run(input []byte, opts ...Option) []byte {
r := NewHTMLRenderer(HTMLRendererParameters{
Flags: CommonHTMLFlags,
})
optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)}
optList = append(optList, opts...)
parser := New(optList...)
ast := parser.Parse(input)
var buf bytes.Buffer
parser.renderer.RenderHeader(&buf, ast)
ast.Walk(func(node *Node, entering bool) WalkStatus {
return parser.renderer.RenderNode(&buf, node, entering)
})
parser.renderer.RenderFooter(&buf, ast)
return buf.Bytes()
}
if p.nesting != 0 {
panic("Nesting level did not end at zero")
}
// Parse is an entry point to the parsing part of Blackfriday. It takes an
// input markdown document and produces a syntax tree for its contents. This
// tree can then be rendered with a default or custom renderer, or
// analyzed/transformed by the caller to whatever non-standard needs they have.
// The return value is the root node of the syntax tree.
func (p *Markdown) Parse(input []byte) *Node {
p.block(input)
// Walk the tree and finish up some of unfinished blocks
for p.tip != nil {
p.finalize(p.tip)
}
// Walk the tree again and process inline markdown in each block
p.doc.Walk(func(node *Node, entering bool) WalkStatus {
if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell {
p.inline(node, node.content)
node.content = nil
}
return GoToNext
})
p.parseRefsToAST()
return p.doc
}
func (p *Markdown) parseRefsToAST() {
if p.extensions&Footnotes == 0 || len(p.notes) == 0 {
return
}
p.tip = p.doc
block := p.addBlock(List, nil)
block.IsFootnotesList = true
block.ListFlags = ListTypeOrdered
flags := ListItemBeginningOfList
// Note: this loop is intentionally explicit, not range-form. This is
// because the body of the loop will append nested footnotes to p.notes and
// we need to process those late additions. Range form would only walk over
// the fixed initial set.
for i := 0; i < len(p.notes); i++ {
ref := p.notes[i]
p.addExistingChild(ref.footnote, 0)
block := ref.footnote
block.ListFlags = flags | ListTypeOrdered
block.RefLink = ref.link
if ref.hasBlock {
flags |= ListItemContainsBlock
p.block(ref.title)
} else {
p.inline(block, ref.title)
}
flags &^= ListItemBeginningOfList | ListItemContainsBlock
}
above := block.Parent
finalizeList(block)
p.tip = above
block.Walk(func(node *Node, entering bool) WalkStatus {
if node.Type == Paragraph || node.Type == Heading {
p.inline(node, node.content)
node.content = nil
}
return GoToNext
})
return output.Bytes()
}
//
@ -486,56 +516,18 @@ func (p *Markdown) parseRefsToAST() {
//
// are not yet supported.
// reference holds all information necessary for a reference-style links or
// footnotes.
//
// Consider this markdown with reference-style links:
//
// [link][ref]
//
// [ref]: /url/ "tooltip title"
//
// It will be ultimately converted to this HTML:
//
// <p><a href=\"/url/\" title=\"title\">link</a></p>
//
// And a reference structure will be populated as follows:
//
// p.refs["ref"] = &reference{
// link: "/url/",
// title: "tooltip title",
// }
//
// Alternatively, reference can contain information about a footnote. Consider
// this markdown:
//
// Text needing a footnote.[^a]
//
// [^a]: This is the note
//
// A reference structure will be populated as follows:
//
// p.refs["a"] = &reference{
// link: "a",
// title: "This is the note",
// noteID: <some positive int>,
// }
//
// TODO: As you can see, it begs for splitting into two dedicated structures
// for refs and for footnotes.
// References are parsed and stored in this struct.
type reference struct {
link []byte
title []byte
noteID int // 0 if not a footnote ref
noteId int // 0 if not a footnote ref
hasBlock bool
footnote *Node // a link to the Item node within a list of footnotes
text []byte // only gets populated by refOverride feature with Reference.Text
text []byte
}
func (r *reference) String() string {
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}",
r.link, r.title, r.text, r.noteID, r.hasBlock)
return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}",
r.link, r.title, r.text, r.noteId, r.hasBlock)
}
// Check whether or not data starts with a reference link.
@ -543,7 +535,7 @@ func (r *reference) String() string {
// (in the render struct).
// Returns the number of bytes to skip to move past it,
// or zero if the first line is not a reference.
func isReference(p *Markdown, data []byte, tabSize int) int {
func isReference(p *parser, data []byte, tabSize int) int {
// up to 3 optional leading spaces
if len(data) < 4 {
return 0
@ -553,18 +545,18 @@ func isReference(p *Markdown, data []byte, tabSize int) int {
i++
}
noteID := 0
noteId := 0
// id part: anything but a newline between brackets
if data[i] != '[' {
return 0
}
i++
if p.extensions&Footnotes != 0 {
if p.flags&EXTENSION_FOOTNOTES != 0 {
if i < len(data) && data[i] == '^' {
// we can set it to anything here because the proper noteIds will
// be assigned later during the second pass. It just has to be != 0
noteID = 1
noteId = 1
i++
}
}
@ -576,11 +568,7 @@ func isReference(p *Markdown, data []byte, tabSize int) int {
return 0
}
idEnd := i
// footnotes can have empty ID, like this: [^], but a reference can not be
// empty like this: []. Break early if it's not a footnote and there's no ID
if noteID == 0 && idOffset == idEnd {
return 0
}
// spacer: colon (space | tab)* newline? (space | tab)*
i++
if i >= len(data) || data[i] != ':' {
@ -611,7 +599,7 @@ func isReference(p *Markdown, data []byte, tabSize int) int {
hasBlock bool
)
if p.extensions&Footnotes != 0 && noteID != 0 {
if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 {
linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize)
lineEnd = linkEnd
} else {
@ -624,11 +612,11 @@ func isReference(p *Markdown, data []byte, tabSize int) int {
// a valid ref has been found
ref := &reference{
noteID: noteID,
noteId: noteId,
hasBlock: hasBlock,
}
if noteID > 0 {
if noteId > 0 {
// reusing the link field for the id since footnotes don't have links
ref.link = data[idOffset:idEnd]
// if footnote, it's not really a title, it's the contained text
@ -646,12 +634,15 @@ func isReference(p *Markdown, data []byte, tabSize int) int {
return lineEnd
}
func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
// link: whitespace-free sequence, optionally between angle brackets
if data[i] == '<' {
i++
}
linkOffset = i
if i == len(data) {
return
}
for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
i++
}
@ -714,13 +705,13 @@ func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOff
return
}
// The first bit of this logic is the same as Parser.listItem, but the rest
// The first bit of this logic is the same as (*parser).listItem, but the rest
// is much simpler. This function simply finds the entire block and shifts it
// over by one tab if it is indeed a block (just returns the line if it's not).
// blockEnd is the end of the section in the input buffer, and contents is the
// extracted text that was shifted over one tab. It will need to be rendered at
// the end of the document.
func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) {
if i == 0 || len(data) == 0 {
return
}
@ -813,7 +804,17 @@ func ispunct(c byte) bool {
// Test if a character is a whitespace character.
func isspace(c byte) bool {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v'
return ishorizontalspace(c) || isverticalspace(c)
}
// Test if a character is a horizontal whitespace character.
func ishorizontalspace(c byte) bool {
return c == ' ' || c == '\t'
}
// Test if a character is a vertical whitespace character.
func isverticalspace(c byte) bool {
return c == '\n' || c == '\r' || c == '\f' || c == '\v'
}
// Test if a character is letter.