package html import ( "bytes" "fmt" "html" "io" "regexp" "sort" "strconv" "strings" "github.com/gomarkdown/markdown/ast" "github.com/gomarkdown/markdown/parser" ) // Flags control optional behavior of HTML renderer. type Flags int // IDTag is the tag used for tag identification, it defaults to "id", some renderers // may wish to override this and use e.g. "anchor". var IDTag = "id" // HTML renderer configuration options. const ( FlagsNone Flags = 0 SkipHTML Flags = 1 << iota // Skip preformatted HTML blocks SkipImages // Skip embedded images SkipLinks // Skip all links Safelink // Only link to trusted protocols NofollowLinks // Only link with rel="nofollow" NoreferrerLinks // Only link with rel="noreferrer" NoopenerLinks // Only link with rel="noopener" HrefTargetBlank // Add a blank target CompletePage // Generate a complete HTML page UseXHTML // Generate XHTML output instead of HTML FootnoteReturnLinks // Generate a link at the end of a footnote to return to the source FootnoteNoHRTag // Do not output an HR after starting a footnote list. Smartypants // Enable smart punctuation substitutions SmartypantsFractions // Enable smart fractions (with Smartypants) SmartypantsDashes // Enable smart dashes (with Smartypants) SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants) SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering SmartypantsQuotesNBSP // Enable « French guillemets » (with Smartypants) TOC // Generate a table of contents LazyLoadImages // Include loading="lazy" with images CommonFlags Flags = Smartypants | SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes ) var ( htmlTagRe = regexp.MustCompile("(?i)^" + htmlTag) ) const ( htmlTag = "(?:" + openTag + "|" + closeTag + "|" + htmlComment + "|" + processingInstruction + "|" + declaration + "|" + cdata + ")" closeTag = "" + tagName + "\\s*[>]" openTag = "<" + tagName + attribute + "*" + "\\s*/?>" attribute = "(?:" + "\\s+" + attributeName + attributeValueSpec + "?)" attributeValue = "(?:" + unquotedValue + "|" + singleQuotedValue + "|" + doubleQuotedValue + ")" attributeValueSpec = "(?:" + "\\s*=" + "\\s*" + attributeValue + ")" attributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*" cdata = "" declaration = "]*>" doubleQuotedValue = "\"[^\"]*\"" htmlComment = "|" processingInstruction = "[<][?].*?[?][>]" singleQuotedValue = "'[^']*'" tagName = "[A-Za-z][A-Za-z0-9-]*" unquotedValue = "[^\"'=<>`\\x00-\\x20]+" ) // RenderNodeFunc allows reusing most of Renderer logic and replacing // rendering of some nodes. If it returns false, Renderer.RenderNode // will execute its logic. If it returns true, Renderer.RenderNode will // skip rendering this node and will return WalkStatus type RenderNodeFunc func(w io.Writer, node ast.Node, entering bool) (ast.WalkStatus, bool) // RendererOptions is a collection of supplementary parameters tweaking // the behavior of various parts of HTML renderer. type RendererOptions struct { // Prepend this text to each relative URL. AbsolutePrefix string // Add this text to each footnote anchor, to ensure uniqueness. FootnoteAnchorPrefix string // Show this text inside the tag for a footnote return link, if the // FootnoteReturnLinks flag is enabled. If blank, the string // [return] is used. FootnoteReturnLinkContents string // CitationFormatString defines how a citation is rendered. If blnck, the string // [%s] is used. Where %s will be substituted with the citation target. CitationFormatString string // If set, add this text to the front of each Heading ID, to ensure uniqueness. HeadingIDPrefix string // If set, add this text to the back of each Heading ID, to ensure uniqueness. HeadingIDSuffix string Title string // Document title (used if CompletePage is set) CSS string // Optional CSS file URL (used if CompletePage is set) Icon string // Optional icon file URL (used if CompletePage is set) Head []byte // Optional head data injected in the section (used if CompletePage is set) Flags Flags // Flags allow customizing this renderer's behavior // if set, called at the start of RenderNode(). Allows replacing // rendering of some nodes RenderNodeHook RenderNodeFunc // Comments is a list of comments the renderer should detect when // parsing code blocks and detecting callouts. Comments [][]byte // Generator is a meta tag that is inserted in the generated HTML so show what rendered it. It should not include the closing tag. // Defaults (note content quote is not closed) to ` " or ">" // Track heading IDs to prevent ID collision in a single generation. headingIDs map[string]int lastOutputLen int // if > 0, will strip html tags in Out and Outs DisableTags int sr *SPRenderer documentMatter ast.DocumentMatters // keep track of front/main/back matter. } // Escaper defines how to escape HTML special characters var Escaper = [256][]byte{ '&': []byte("&"), '<': []byte("<"), '>': []byte(">"), '"': []byte("""), } // EscapeHTML writes html-escaped d to w. It escapes &, <, > and " characters. func EscapeHTML(w io.Writer, d []byte) { var start, end int n := len(d) for end < n { escSeq := Escaper[d[end]] if escSeq != nil { w.Write(d[start:end]) w.Write(escSeq) start = end + 1 } end++ } if start < n && end <= n { w.Write(d[start:end]) } } func escLink(w io.Writer, text []byte) { unesc := html.UnescapeString(string(text)) EscapeHTML(w, []byte(unesc)) } // Escape writes the text to w, but skips the escape character. func Escape(w io.Writer, text []byte) { esc := false for i := 0; i < len(text); i++ { if text[i] == '\\' { esc = !esc } if esc && text[i] == '\\' { continue } w.Write([]byte{text[i]}) } } // NewRenderer creates and configures an Renderer object, which // satisfies the Renderer interface. func NewRenderer(opts RendererOptions) *Renderer { // configure the rendering engine closeTag := ">" if opts.Flags&UseXHTML != 0 { closeTag = " />" } if opts.FootnoteReturnLinkContents == "" { opts.FootnoteReturnLinkContents = `[return]` } if opts.CitationFormatString == "" { opts.CitationFormatString = `[%s]` } if opts.Generator == "" { opts.Generator = ` = len(tagname) { break } if strings.ToLower(string(tag[i]))[0] != tagname[j] { return false, -1 } } if i == len(tag) { return false, -1 } rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') if rightAngle >= i { return true, rightAngle } return false, -1 } func isRelativeLink(link []byte) (yes bool) { // a tag begin with '#' if link[0] == '#' { return true } // link begin with '/' but not '//', the second maybe a protocol relative link if len(link) >= 2 && link[0] == '/' && link[1] != '/' { return true } // only the root '/' if len(link) == 1 && link[0] == '/' { return true } // current directory : begin with "./" if bytes.HasPrefix(link, []byte("./")) { return true } // parent directory : begin with "../" if bytes.HasPrefix(link, []byte("../")) { return true } return false } func (r *Renderer) ensureUniqueHeadingID(id string) string { for count, found := r.headingIDs[id]; found; count, found = r.headingIDs[id] { tmp := fmt.Sprintf("%s-%d", id, count+1) if _, tmpFound := r.headingIDs[tmp]; !tmpFound { r.headingIDs[id] = count + 1 id = tmp } else { id = id + "-1" } } if _, found := r.headingIDs[id]; !found { r.headingIDs[id] = 0 } return id } func (r *Renderer) addAbsPrefix(link []byte) []byte { if r.opts.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' { newDest := r.opts.AbsolutePrefix if link[0] != '/' { newDest += "/" } newDest += string(link) return []byte(newDest) } return link } func appendLinkAttrs(attrs []string, flags Flags, link []byte) []string { if isRelativeLink(link) { return attrs } var val []string if flags&NofollowLinks != 0 { val = append(val, "nofollow") } if flags&NoreferrerLinks != 0 { val = append(val, "noreferrer") } if flags&NoopenerLinks != 0 { val = append(val, "noopener") } if flags&HrefTargetBlank != 0 { attrs = append(attrs, `target="_blank"`) } if len(val) == 0 { return attrs } attr := fmt.Sprintf("rel=%q", strings.Join(val, " ")) return append(attrs, attr) } func isMailto(link []byte) bool { return bytes.HasPrefix(link, []byte("mailto:")) } func needSkipLink(flags Flags, dest []byte) bool { if flags&SkipLinks != 0 { return true } return flags&Safelink != 0 && !isSafeLink(dest) && !isMailto(dest) } func isSmartypantable(node ast.Node) bool { switch node.GetParent().(type) { case *ast.Link, *ast.CodeBlock, *ast.Code: return false } return true } func appendLanguageAttr(attrs []string, info []byte) []string { if len(info) == 0 { return attrs } endOfLang := bytes.IndexAny(info, "\t ") if endOfLang < 0 { endOfLang = len(info) } s := `class="language-` + string(info[:endOfLang]) + `"` return append(attrs, s) } func (r *Renderer) outTag(w io.Writer, name string, attrs []string) { s := name if len(attrs) > 0 { s += " " + strings.Join(attrs, " ") } io.WriteString(w, s+">") r.lastOutputLen = 1 } func footnoteRef(prefix string, node *ast.Link) string { urlFrag := prefix + string(slugify(node.Destination)) nStr := strconv.Itoa(node.NoteID) anchor := `` + nStr + `` return `` + anchor + `` } func footnoteItem(prefix string, slug []byte) string { return `
")
if !(isListItem(para.Parent) && ast.GetNextNode(para) == nil) {
r.CR(w)
}
}
// Paragraph writes ast.Paragraph node
func (r *Renderer) Paragraph(w io.Writer, para *ast.Paragraph, entering bool) {
if skipParagraphTags(para) {
return
}
if entering {
r.paragraphEnter(w, para)
} else {
r.paragraphExit(w, para)
}
}
// Code writes ast.Code node
func (r *Renderer) Code(w io.Writer, node *ast.Code) {
r.Outs(w, "")
EscapeHTML(w, node.Literal)
r.Outs(w, "
")
}
// HTMLBlock write ast.HTMLBlock node
func (r *Renderer) HTMLBlock(w io.Writer, node *ast.HTMLBlock) {
if r.opts.Flags&SkipHTML != 0 {
return
}
r.CR(w)
r.Out(w, node.Literal)
r.CR(w)
}
func (r *Renderer) headingEnter(w io.Writer, nodeData *ast.Heading) {
var attrs []string
var class string
// TODO(miek): add helper functions for coalescing these classes.
if nodeData.IsTitleblock {
class = "title"
}
if nodeData.IsSpecial {
if class != "" {
class += " special"
} else {
class = "special"
}
}
if class != "" {
attrs = []string{`class="` + class + `"`}
}
if nodeData.HeadingID != "" {
id := r.ensureUniqueHeadingID(nodeData.HeadingID)
if r.opts.HeadingIDPrefix != "" {
id = r.opts.HeadingIDPrefix + id
}
if r.opts.HeadingIDSuffix != "" {
id = id + r.opts.HeadingIDSuffix
}
attrID := `id="` + id + `"`
attrs = append(attrs, attrID)
}
attrs = append(attrs, BlockAttrs(nodeData)...)
r.CR(w)
r.outTag(w, headingOpenTagFromLevel(nodeData.Level), attrs)
}
func (r *Renderer) headingExit(w io.Writer, heading *ast.Heading) {
r.Outs(w, headingCloseTagFromLevel(heading.Level))
if !(isListItem(heading.Parent) && ast.GetNextNode(heading) == nil) {
r.CR(w)
}
}
// Heading writes ast.Heading node
func (r *Renderer) Heading(w io.Writer, node *ast.Heading, entering bool) {
if entering {
r.headingEnter(w, node)
} else {
r.headingExit(w, node)
}
}
// HorizontalRule writes ast.HorizontalRule node
func (r *Renderer) HorizontalRule(w io.Writer, node *ast.HorizontalRule) {
r.CR(w)
r.outHRTag(w, BlockAttrs(node))
r.CR(w)
}
func (r *Renderer) listEnter(w io.Writer, nodeData *ast.List) {
// TODO: attrs don't seem to be set
var attrs []string
if nodeData.IsFootnotesList {
r.Outs(w, "\n
")
code := TagWithAttributes("")
r.Outs(w, "
")
if !isListItem(codeBlock.Parent) {
r.CR(w)
}
}
// Caption writes ast.Caption node
func (r *Renderer) Caption(w io.Writer, caption *ast.Caption, entering bool) {
if entering {
r.Outs(w, "") case *ast.Aside: tag := TagWithAttributes("