diff --git a/internal/renderer/blockquote.go b/internal/renderer/blockquote.go new file mode 100644 index 0000000..97df2cd --- /dev/null +++ b/internal/renderer/blockquote.go @@ -0,0 +1,47 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +var ( + quoteBrPrefix = []byte("\n> ") + quotePrefix = []byte("> ") +) + +func (r Renderer) blockquote(w io.Writer, node *ast.BlockQuote, entering bool) { + if entering { + if node := node.AsContainer(); node != nil { + for _, child := range node.Children { + w.Write(quotePrefix) + r.blockquoteText(w, child) + // double linebreak to ensure Gemini clients don't merge + // quotes; gomarkdown assumes separate blockquotes are + // paragraphs of the same blockquote while we don't + w.Write(lineBreak) + w.Write(lineBreak) + } + } + } +} + +func (r Renderer) blockquoteText(w io.Writer, node ast.Node) { + w.Write(textWithNewlineReplacement(node, quoteBrPrefix, true)) +} diff --git a/internal/renderer/code.go b/internal/renderer/code.go new file mode 100644 index 0000000..adbd99c --- /dev/null +++ b/internal/renderer/code.go @@ -0,0 +1,35 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +var preformattedToggle = []byte("```") + +func (r Renderer) code(w io.Writer, node *ast.CodeBlock) { + w.Write(preformattedToggle) + if node.IsFenced { + w.Write(node.Info) + } + w.Write(lineBreak) + w.Write(node.Literal) + w.Write(preformattedToggle) + w.Write(lineBreak) +} diff --git a/internal/renderer/doc.go b/internal/renderer/doc.go new file mode 100644 index 0000000..8dfd691 --- /dev/null +++ b/internal/renderer/doc.go @@ -0,0 +1,18 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +// Package renderer contains an implementation of markdown => text/gemini +// renderer for github.com/gomarkdown/markdown. +package renderer diff --git a/internal/renderer/heading.go b/internal/renderer/heading.go new file mode 100644 index 0000000..934139b --- /dev/null +++ b/internal/renderer/heading.go @@ -0,0 +1,39 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +func (r Renderer) heading(w io.Writer, node *ast.Heading, entering bool) { + if entering { + // pad headings with the relevant number of #-s; Gemini spec + // used to allow 3 at maximum before a space + bufLength := node.Level + 1 + heading := make([]byte, bufLength) + heading[len(heading)-1] = ' ' + for i := 0; i < len(heading)-1; i++ { + heading[i] = '#' + } + w.Write(heading) + r.text(w, node, true) + } else { + w.Write(lineBreak) + } +} diff --git a/internal/renderer/hr.go b/internal/renderer/hr.go new file mode 100644 index 0000000..7459a28 --- /dev/null +++ b/internal/renderer/hr.go @@ -0,0 +1,32 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +var horizontalRule = []byte("---") + +func (r Renderer) hr(w io.Writer, node *ast.HorizontalRule, entering bool) { + if entering { + w.Write(horizontalRule) + w.Write(lineBreak) + w.Write(lineBreak) + } +} diff --git a/internal/renderer/html.go b/internal/renderer/html.go new file mode 100644 index 0000000..73f6a01 --- /dev/null +++ b/internal/renderer/html.go @@ -0,0 +1,77 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "fmt" + "html" + "io" + "regexp" + + "github.com/gomarkdown/markdown/ast" + "github.com/grokify/html-strip-tags-go" +) + +// fairly tolerant to handle weird HTML +var tagPairRegexString = `<[\n\f ]*%s([\n\f ]+[^\n\f \/>"'=]+[\n\f ]*(=[\n\f ]*([a-zA-Z1-9\-]+|"[^\n\f"]+"|'[^\n\f']+'))?)*[\n\f ]*>.*?<[\n\f ]*/[\n\f ]*%s[\n\f ]*>` + +// HTML block tags whose contents should not be rendered +var htmlNoRenderRegex = []*regexp.Regexp{ + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "fieldset", "fieldset")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "form", "form")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "iframe", "iframe")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "script", "script")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "style", "style")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "canvas", "canvas")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "dialog", "dialog")), + regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "progress", "progress")), +} + +var hardBreakTag = regexp.MustCompile(`< *br */? *>`) +var escapedHtmlChar = regexp.MustCompile(`(?:^|[^\\\\])&[[:alnum:]]+;`) + +func (r Renderer) htmlBlock(w io.Writer, node *ast.HTMLBlock, entering bool) { + if entering { + htmlString := stripHtml(node, []byte{}) + if len(htmlString) > 0 { + w.Write([]byte(htmlString)) + w.Write(lineBreak) + w.Write(lineBreak) + } + } +} + +func stripHtml(node *ast.HTMLBlock, linePrefix []byte) string { + // Only render contents of allowed tags + literal := node.Literal + for _, re := range htmlNoRenderRegex { + literal = re.ReplaceAllLiteral(literal, []byte{}) + } + if len(literal) > 0 { + literalWithBreaks := hardBreakTag.ReplaceAll(lineBreakCharacters.ReplaceAll(literal, space), append([]byte(lineBreak), linePrefix...)) + literalStripped := strip.StripTags(string(literalWithBreaks)) + return html.UnescapeString(literalStripped) + } + return "" +} + +func unescapeHtmlText(text []byte) []byte { + return escapedHtmlChar.ReplaceAll(text, []byte(html.UnescapeString(string(text)))) +} + +func isHardBreak(text []byte) bool { + return hardBreakTag.Match(text) +} diff --git a/internal/renderer/image.go b/internal/renderer/image.go new file mode 100644 index 0000000..0a8cfba --- /dev/null +++ b/internal/renderer/image.go @@ -0,0 +1,31 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +func (r Renderer) image(w io.Writer, node *ast.Image, entering bool) { + if entering { + w.Write(linkPrefix) + w.Write(node.Destination) + w.Write(space) + r.text(w, node, true) + } +} diff --git a/internal/renderer/link.go b/internal/renderer/link.go new file mode 100644 index 0000000..460a37e --- /dev/null +++ b/internal/renderer/link.go @@ -0,0 +1,36 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "fmt" + "io" + + "github.com/gomarkdown/markdown/ast" +) + +func (r Renderer) link(w io.Writer, node *ast.Link, entering bool) { + if entering { + if node.Footnote != nil { + fmt.Fprintf(w, "[^%d]: %s", node.NoteID, extractText(node.Footnote)) + } else { + w.Write(linkPrefix) + w.Write(node.Destination) + w.Write(space) + r.text(w, node, true) + } + } +} diff --git a/internal/renderer/list.go b/internal/renderer/list.go new file mode 100644 index 0000000..a28e947 --- /dev/null +++ b/internal/renderer/list.go @@ -0,0 +1,110 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "fmt" + "io" + + "github.com/gomarkdown/markdown/ast" +) + +var ( + itemIndent = []byte{'\t'} + itemPrefix = []byte("* ") +) + +func (r Renderer) renderFootnotes(w io.Writer, links []ast.Node) (count uint) { + for _, link := range links { + if link, ok := link.(*ast.Link); ok && link.Footnote != nil { + r.link(w, link, true) + w.Write(lineBreak) + count++ + } + } + return +} + +func (r Renderer) renderImages(w io.Writer, links []ast.Node) (count uint) { + for _, link := range links { + if link, ok := link.(*ast.Image); ok { + r.image(w, link, true) + w.Write(lineBreak) + count++ + } + } + return +} + +func (r Renderer) renderLinks(w io.Writer, links []ast.Node) (count uint) { + for _, link := range links { + if link, ok := link.(*ast.Link); ok && link.Footnote == nil { + r.link(w, link, true) + w.Write(lineBreak) + count++ + } + } + return +} + +func (r Renderer) linksList(w io.Writer, links []ast.Node) { + for _, renderer := range []func(Renderer, io.Writer, []ast.Node) uint{ + Renderer.renderFootnotes, + Renderer.renderImages, + Renderer.renderLinks, + } { + linksRendered := renderer(r, w, links) + // ensure breaks between link blocks of the same type + if linksRendered > 0 { + w.Write(lineBreak) + } + } +} + +func (r Renderer) list(w io.Writer, node *ast.List, level int) { + // the text/gemini spec included with the current Gemini spec does + // not specify anything about the formatting of lists of level >= 2, + // as of now this will just render them like in Markdown + isNumbered := (node.ListFlags & ast.ListTypeOrdered) != 0 + for number, item := range node.Children { + item, ok := item.(*ast.ListItem) + if !ok { + panic("rendering anything but list items is not supported") + } + isTerm := (item.ListFlags & ast.ListTypeTerm) == ast.ListTypeTerm + if l := len(item.Children); l >= 1 { + // add extra line break to split up definitions + if isTerm && number > 0 { + w.Write(lineBreak) + } + for i := 0; i < level; i++ { + w.Write(itemIndent) + } + if isNumbered { + w.Write([]byte(fmt.Sprintf("%d. ", number+1))) + } else if !isTerm { + w.Write(itemPrefix) + } + r.text(w, item, true) + w.Write(lineBreak) + if l >= 2 { + if list, ok := item.Children[1].(*ast.List); ok { + r.list(w, list, level+1) + } + } + } + } +} diff --git a/internal/renderer/paragraph.go b/internal/renderer/paragraph.go new file mode 100644 index 0000000..bcd5a30 --- /dev/null +++ b/internal/renderer/paragraph.go @@ -0,0 +1,77 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" +) + +func isLinksOnlyParagraph(node *ast.Paragraph) bool { + for _, child := range node.Children { + switch child := child.(type) { + case *ast.Text: + if emptyLineRegex.Find(child.Literal) != nil { + continue + } + case *ast.Link, *ast.Image: + continue + } + return false + } + return true +} + +func (r Renderer) paragraph(w io.Writer, node *ast.Paragraph, entering bool) (noNewLine bool) { + linksOnly := isLinksOnlyParagraph(node) + noNewLine = linksOnly + if entering { + children := node.Children + // current version of gomarkdown/markdown finds an empty + // *ast.Text element before links/images, breaking the heuristic + if len(children) >= 2 { + firstChild, elementIsText := children[0].(*ast.Text) + if elementIsText && len(firstChild.Literal) == 0 { + children = children[1:] + } + } + if !linksOnly { + for _, child := range children { + // only render links text in the paragraph if they're + // combined with some other text on page + switch child := child.(type) { + case *ast.Text, *ast.Emph, *ast.Strong, *ast.Del, *ast.Link, *ast.Image: + r.text(w, child, true) + case *ast.Code: + r.text(w, child, false) + case *ast.Hardbreak: + w.Write(lineBreak) + case *ast.HTMLSpan: + if isHardBreak(child.AsLeaf().Literal) { + w.Write(lineBreak) + } + case *ast.Subscript: + r.subscript(w, child, true) + case *ast.Superscript: + r.superscript(w, child, true) + } + } + w.Write(lineBreak) + } + } + return +} diff --git a/internal/renderer/renderer.go b/internal/renderer/renderer.go index c864976..3658f72 100644 --- a/internal/renderer/renderer.go +++ b/internal/renderer/renderer.go @@ -13,63 +13,31 @@ // You should have received a copy of the GNU General Public License // along with gmnhg. If not, see . -// Package renderer contains an implementation of markdown => text/gemini -// renderer for github.com/gomarkdown/markdown. package renderer import ( "bytes" "fmt" - "html" "io" "regexp" "github.com/gomarkdown/markdown/ast" - "github.com/grokify/html-strip-tags-go" - "github.com/olekukonko/tablewriter" ) var ( - lineBreak = []byte{'\n'} - space = []byte{' '} - linkPrefix = []byte("=> ") - quoteBrPrefix = []byte("\n> ") - quotePrefix = []byte("> ") - itemPrefix = []byte("* ") - itemIndent = []byte{'\t'} - preformattedToggle = []byte("```") - codeDelimiter = []byte("`") - emphDelimiter = []byte("*") - strongDelimiter = []byte("**") - delDelimiter = []byte("~~") - horizontalRule = []byte("---") - subOpen = []byte("_{") - subClose = []byte("}") - supOpen = []byte("^(") - supClose = []byte(")") + lineBreak = []byte{'\n'} + space = []byte{' '} + linkPrefix = []byte("=> ") + codeDelimiter = []byte("`") + emphDelimiter = []byte("*") + strongDelimiter = []byte("**") + delDelimiter = []byte("~~") ) // matches a FULL string that contains no non-whitespace characters var emptyLineRegex = regexp.MustCompile(`\A[\s]*\z`) -// fairly tolerant to handle weird HTML -var tagPairRegexString = `<[\n\f ]*%s([\n\f ]+[^\n\f \/>"'=]+[\n\f ]*(=[\n\f ]*([a-zA-Z1-9\-]+|"[^\n\f"]+"|'[^\n\f']+'))?)*[\n\f ]*>.*?<[\n\f ]*/[\n\f ]*%s[\n\f ]*>` - -// HTML block tags whose contents should not be rendered -var htmlNoRenderRegex = []*regexp.Regexp{ - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "fieldset", "fieldset")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "form", "form")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "iframe", "iframe")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "script", "script")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "style", "style")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "canvas", "canvas")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "dialog", "dialog")), - regexp.MustCompile(fmt.Sprintf(tagPairRegexString, "progress", "progress")), -} - var lineBreakCharacters = regexp.MustCompile(`[\n\r]+`) -var hardBreakTag = regexp.MustCompile(`< *br */? *>`) -var escapedHtmlChar = regexp.MustCompile(`(?:^|[^\\\\])&[[:alnum:]]+;`) // Renderer implements markdown.Renderer. type Renderer struct{} @@ -94,277 +62,6 @@ func getNodeDelimiter(node ast.Node) []byte { } } -func (r Renderer) link(w io.Writer, node *ast.Link, entering bool) { - if entering { - if node.Footnote != nil { - fmt.Fprintf(w, "[^%d]: %s", node.NoteID, extractText(node.Footnote)) - } else { - w.Write(linkPrefix) - w.Write(node.Destination) - w.Write(space) - r.text(w, node, true) - } - } -} - -func (r Renderer) image(w io.Writer, node *ast.Image, entering bool) { - if entering { - w.Write(linkPrefix) - w.Write(node.Destination) - w.Write(space) - r.text(w, node, true) - } -} - -func (r Renderer) blockquote(w io.Writer, node *ast.BlockQuote, entering bool) { - if entering { - if node := node.AsContainer(); node != nil { - for _, child := range node.Children { - w.Write(quotePrefix) - r.blockquoteText(w, child) - // double linebreak to ensure Gemini clients don't merge - // quotes; gomarkdown assumes separate blockquotes are - // paragraphs of the same blockquote while we don't - w.Write(lineBreak) - w.Write(lineBreak) - } - } - } -} - -func (r Renderer) hr(w io.Writer, node *ast.HorizontalRule, entering bool) { - if entering { - w.Write(horizontalRule) - w.Write(lineBreak) - w.Write(lineBreak) - } -} - -// Based on https://pages.uoregon.edu/ncp/Courses/MathInPlainTextEmail.html -func (r Renderer) subscript(w io.Writer, node *ast.Subscript, entering bool) { - if entering { - if node := node.AsLeaf(); node != nil { - w.Write(subOpen) - w.Write(bytes.ReplaceAll(node.Literal, lineBreak, space)) - w.Write(subClose) - } - } -} -func (r Renderer) superscript(w io.Writer, node *ast.Superscript, entering bool) { - if entering { - if node := node.AsLeaf(); node != nil { - w.Write(supOpen) - w.Write(bytes.ReplaceAll(node.Literal, lineBreak, space)) - w.Write(supClose) - } - } -} - -func (r Renderer) heading(w io.Writer, node *ast.Heading, entering bool) { - if entering { - // pad headings with the relevant number of #-s; Gemini spec - // used to allow 3 at maximum before a space - bufLength := node.Level + 1 - heading := make([]byte, bufLength) - heading[len(heading)-1] = ' ' - for i := 0; i < len(heading)-1; i++ { - heading[i] = '#' - } - w.Write(heading) - r.text(w, node, true) - } else { - w.Write(lineBreak) - } -} - -func extractLinks(node ast.Node) (stack []ast.Node) { - if node := node.AsContainer(); node != nil { - for _, subnode := range node.Children { - stack = append(stack, extractLinks(subnode)...) - } - } - switch node := node.(type) { - case *ast.Image: - stack = append(stack, node) - case *ast.Link: - stack = append(stack, node) - // footnotes are represented as links which embed an extra node - // containing footnote text; the link itself is not considered a - // container - if node.Footnote != nil { - stack = append(stack, extractLinks(node.Footnote)...) - } - } - return stack -} - -func (r Renderer) renderLinks(w io.Writer, links []ast.Node) (count uint) { - for _, link := range links { - if link, ok := link.(*ast.Link); ok && link.Footnote == nil { - r.link(w, link, true) - w.Write(lineBreak) - count++ - } - } - return -} - -func (r Renderer) renderFootnotes(w io.Writer, links []ast.Node) (count uint) { - for _, link := range links { - if link, ok := link.(*ast.Link); ok && link.Footnote != nil { - r.link(w, link, true) - w.Write(lineBreak) - count++ - } - } - return -} - -func (r Renderer) renderImages(w io.Writer, links []ast.Node) (count uint) { - for _, link := range links { - if link, ok := link.(*ast.Image); ok { - r.image(w, link, true) - w.Write(lineBreak) - count++ - } - } - return -} - -func (r Renderer) linksList(w io.Writer, links []ast.Node) { - for _, renderer := range []func(Renderer, io.Writer, []ast.Node) uint{ - Renderer.renderFootnotes, - Renderer.renderImages, - Renderer.renderLinks, - } { - linksRendered := renderer(r, w, links) - // ensure breaks between link blocks of the same type - if linksRendered > 0 { - w.Write(lineBreak) - } - } -} - -func isLinksOnlyParagraph(node *ast.Paragraph) bool { - for _, child := range node.Children { - switch child := child.(type) { - case *ast.Text: - if emptyLineRegex.Find(child.Literal) != nil { - continue - } - case *ast.Link, *ast.Image: - continue - } - return false - } - return true -} - -func isLinksOnlyList(node *ast.List) bool { - for _, child := range node.Children { - child, ok := child.(*ast.ListItem) - if !ok { - return false // should never happen - } - for _, liChild := range child.Children { - liChild, ok := liChild.(*ast.Paragraph) - if !ok { - return false // sublist, etc - } - if !isLinksOnlyParagraph(liChild) { - return false - } - } - } - return true -} - -func (r Renderer) paragraph(w io.Writer, node *ast.Paragraph, entering bool) (noNewLine bool) { - linksOnly := isLinksOnlyParagraph(node) - noNewLine = linksOnly - if entering { - children := node.Children - // current version of gomarkdown/markdown finds an empty - // *ast.Text element before links/images, breaking the heuristic - if len(children) >= 2 { - firstChild, elementIsText := children[0].(*ast.Text) - if elementIsText && len(firstChild.Literal) == 0 { - children = children[1:] - } - } - if !linksOnly { - for _, child := range children { - // only render links text in the paragraph if they're - // combined with some other text on page - switch child := child.(type) { - case *ast.Text, *ast.Emph, *ast.Strong, *ast.Del, *ast.Link, *ast.Image: - r.text(w, child, true) - case *ast.Code: - r.text(w, child, false) - case *ast.Hardbreak: - w.Write(lineBreak) - case *ast.HTMLSpan: - if hardBreakTag.Match(child.AsLeaf().Literal) { - w.Write(lineBreak) - } - case *ast.Subscript: - r.subscript(w, child, true) - case *ast.Superscript: - r.superscript(w, child, true) - } - } - w.Write(lineBreak) - } - } - return -} - -func (r Renderer) code(w io.Writer, node *ast.CodeBlock) { - w.Write(preformattedToggle) - if node.IsFenced { - w.Write(node.Info) - } - w.Write(lineBreak) - w.Write(node.Literal) - w.Write(preformattedToggle) - w.Write(lineBreak) -} - -func (r Renderer) list(w io.Writer, node *ast.List, level int) { - // the text/gemini spec included with the current Gemini spec does - // not specify anything about the formatting of lists of level >= 2, - // as of now this will just render them like in Markdown - isNumbered := (node.ListFlags & ast.ListTypeOrdered) != 0 - for number, item := range node.Children { - item, ok := item.(*ast.ListItem) - if !ok { - panic("rendering anything but list items is not supported") - } - isTerm := (item.ListFlags & ast.ListTypeTerm) == ast.ListTypeTerm - if l := len(item.Children); l >= 1 { - // add extra line break to split up definitions - if isTerm && number > 0 { - w.Write(lineBreak) - } - for i := 0; i < level; i++ { - w.Write(itemIndent) - } - if isNumbered { - w.Write([]byte(fmt.Sprintf("%d. ", number+1))) - } else if !isTerm { - w.Write(itemPrefix) - } - r.text(w, item, true) - w.Write(lineBreak) - if l >= 2 { - if list, ok := item.Children[1].(*ast.List); ok { - r.list(w, list, level+1) - } - } - } - } -} - func textWithNewlineReplacement(node ast.Node, replacement []byte, unescapeHtml bool) []byte { buf := bytes.Buffer{} delimiter := getNodeDelimiter(node) @@ -387,17 +84,16 @@ func textWithNewlineReplacement(node ast.Node, replacement []byte, unescapeHtml buf.Write(quotePrefix) } case *ast.HTMLSpan: - if hardBreakTag.Match(leaf.Literal) { + if isHardBreak(leaf.Literal) { buf.Write(lineBreak) } buf.Write(leaf.Content) case *ast.HTMLBlock: - buf.Write([]byte(extractHtml(node, quotePrefix))) + buf.Write([]byte(stripHtml(node, quotePrefix))) default: textWithoutBreaks := lineBreakCharacters.ReplaceAll(leaf.Literal, replacement) if unescapeHtml { - unescapedText := escapedHtmlChar.ReplaceAll(textWithoutBreaks, []byte(html.UnescapeString(string(textWithoutBreaks)))) - buf.Write(unescapedText) + buf.Write(unescapeHtmlText(textWithoutBreaks)) } else { buf.Write(textWithoutBreaks) } @@ -423,95 +119,44 @@ func (r Renderer) text(w io.Writer, node ast.Node, unescapeHtml bool) { w.Write(textWithNewlineReplacement(node, space, unescapeHtml)) } -func (r Renderer) blockquoteText(w io.Writer, node ast.Node) { - w.Write(textWithNewlineReplacement(node, quoteBrPrefix, true)) -} - -func extractText(node ast.Node) string { - return string(textWithNewlineReplacement(node, space, true)) -} - -func extractHtml(node *ast.HTMLBlock, linePrefix []byte) string { - // Only render contents of allowed tags - literal := node.Literal - for _, re := range htmlNoRenderRegex { - literal = re.ReplaceAllLiteral(literal, []byte{}) - } - if len(literal) > 0 { - literalWithBreaks := hardBreakTag.ReplaceAll(lineBreakCharacters.ReplaceAll(literal, space), append([]byte(lineBreak), linePrefix...)) - literalStripped := strip.StripTags(string(literalWithBreaks)) - return html.UnescapeString(literalStripped) - } - return "" -} - -func (r Renderer) tableHead(t *tablewriter.Table, node *ast.TableHeader) { +func extractLinks(node ast.Node) (stack []ast.Node) { if node := node.AsContainer(); node != nil { - // should always have a single row consisting of at least one - // cell but worth checking nonetheless; tablewriter only - // supports a single header row as of now therefore ignore - // second row and the rest - if len(node.Children) > 0 { - if row := node.Children[0].AsContainer(); row != nil { - cells := make([]string, len(row.Children)) - for i, cell := range row.Children { - cells[i] = extractText(cell) - } - t.SetHeader(cells) + for _, subnode := range node.Children { + stack = append(stack, extractLinks(subnode)...) + } + } + switch node := node.(type) { + case *ast.Image: + stack = append(stack, node) + case *ast.Link: + stack = append(stack, node) + // footnotes are represented as links which embed an extra node + // containing footnote text; the link itself is not considered a + // container + if node.Footnote != nil { + stack = append(stack, extractLinks(node.Footnote)...) + } + } + return stack +} + +func isLinksOnlyList(node *ast.List) bool { + for _, child := range node.Children { + child, ok := child.(*ast.ListItem) + if !ok { + return false // should never happen + } + for _, liChild := range child.Children { + liChild, ok := liChild.(*ast.Paragraph) + if !ok { + return false // sublist, etc + } + if !isLinksOnlyParagraph(liChild) { + return false } } } -} - -func (r Renderer) tableBody(t *tablewriter.Table, node *ast.TableBody) { - if node := node.AsContainer(); node != nil { - for _, row := range node.Children { - if row := row.AsContainer(); row != nil { - cells := make([]string, len(row.Children)) - for i, cell := range row.Children { - cells[i] = extractText(cell) - } - t.Append(cells) - } - } - } -} - -func (r Renderer) table(w io.Writer, node *ast.Table, entering bool) { - if entering { - w.Write(preformattedToggle) - w.Write(lineBreak) - // gomarkdown appears to only parse headings consisting of a - // single line and always have a TableBody preceded by a single - // TableHeader but we're better off not relying on it - t := tablewriter.NewWriter(w) - t.SetAutoFormatHeaders(false) // TODO: tablewriter options should probably be configurable - if node := node.AsContainer(); node != nil { - for _, child := range node.Children { - switch child := child.(type) { - case *ast.TableHeader: - r.tableHead(t, child) - case *ast.TableBody: - r.tableBody(t, child) - } - } - } - t.Render() - } else { - w.Write(preformattedToggle) - w.Write(lineBreak) - } -} - -func (r Renderer) htmlBlock(w io.Writer, node *ast.HTMLBlock, entering bool) { - if entering { - htmlString := extractHtml(node, []byte{}) - if len(htmlString) > 0 { - w.Write([]byte(htmlString)) - w.Write(lineBreak) - w.Write(lineBreak) - } - } + return true } // RenderNode implements Renderer.RenderNode(). diff --git a/internal/renderer/supersub.go b/internal/renderer/supersub.go new file mode 100644 index 0000000..7600d1a --- /dev/null +++ b/internal/renderer/supersub.go @@ -0,0 +1,51 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "bytes" + "io" + + "github.com/gomarkdown/markdown/ast" +) + +// Based on https://pages.uoregon.edu/ncp/Courses/MathInPlainTextEmail.html +var ( + subOpen = []byte("_{") + subClose = []byte("}") + supOpen = []byte("^(") + supClose = []byte(")") +) + +func (r Renderer) subscript(w io.Writer, node *ast.Subscript, entering bool) { + if entering { + if node := node.AsLeaf(); node != nil { + w.Write(subOpen) + w.Write(bytes.ReplaceAll(node.Literal, lineBreak, space)) + w.Write(subClose) + } + } +} + +func (r Renderer) superscript(w io.Writer, node *ast.Superscript, entering bool) { + if entering { + if node := node.AsLeaf(); node != nil { + w.Write(supOpen) + w.Write(bytes.ReplaceAll(node.Literal, lineBreak, space)) + w.Write(supClose) + } + } +} diff --git a/internal/renderer/table.go b/internal/renderer/table.go new file mode 100644 index 0000000..5fefae5 --- /dev/null +++ b/internal/renderer/table.go @@ -0,0 +1,85 @@ +// This file is part of gmnhg. + +// gmnhg is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// gmnhg is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with gmnhg. If not, see . + +package renderer + +import ( + "io" + + "github.com/gomarkdown/markdown/ast" + "github.com/olekukonko/tablewriter" +) + +func extractText(node ast.Node) string { + return string(textWithNewlineReplacement(node, space, true)) +} + +func (r Renderer) tableHead(t *tablewriter.Table, node *ast.TableHeader) { + if node := node.AsContainer(); node != nil { + // should always have a single row consisting of at least one + // cell but worth checking nonetheless; tablewriter only + // supports a single header row as of now therefore ignore + // second row and the rest + if len(node.Children) > 0 { + if row := node.Children[0].AsContainer(); row != nil { + cells := make([]string, len(row.Children)) + for i, cell := range row.Children { + cells[i] = extractText(cell) + } + t.SetHeader(cells) + } + } + } +} + +func (r Renderer) tableBody(t *tablewriter.Table, node *ast.TableBody) { + if node := node.AsContainer(); node != nil { + for _, row := range node.Children { + if row := row.AsContainer(); row != nil { + cells := make([]string, len(row.Children)) + for i, cell := range row.Children { + cells[i] = extractText(cell) + } + t.Append(cells) + } + } + } +} + +func (r Renderer) table(w io.Writer, node *ast.Table, entering bool) { + if entering { + w.Write(preformattedToggle) + w.Write(lineBreak) + // gomarkdown appears to only parse headings consisting of a + // single line and always have a TableBody preceded by a single + // TableHeader but we're better off not relying on it + t := tablewriter.NewWriter(w) + t.SetAutoFormatHeaders(false) // TODO: tablewriter options should probably be configurable + if node := node.AsContainer(); node != nil { + for _, child := range node.Children { + switch child := child.(type) { + case *ast.TableHeader: + r.tableHead(t, child) + case *ast.TableBody: + r.tableBody(t, child) + } + } + } + t.Render() + } else { + w.Write(preformattedToggle) + w.Write(lineBreak) + } +}