From 1eff210a1cba05222f38d8d436f608d0636a2c5c Mon Sep 17 00:00:00 2001
From: Henry Jameson
+ const nonEmptyElements = new Set(visualLineElements)
+ // Difference
+ for (let elem of emptyElements) {
+ nonEmptyElements.delete(elem)
+ }
+
+ // All elements that we are recognizing
+ const allElements = new Set([
+ ...nonEmptyElements.values(),
+ ...emptyElements.values()
+ ])
let buffer = [] // Current output buffer
const level = [] // How deep we are in tags and which tags were there
@@ -29,8 +62,8 @@ export const convertHtmlToLines = (html) => {
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
- if (textBuffer.trim().length > 0 && !level.some(l => ignoredTags.has(l))) {
- buffer.push({ text: textBuffer })
+ if (textBuffer.trim().length > 0) {
+ buffer.push({ level: [...level], text: textBuffer })
} else {
buffer.push(textBuffer)
}
@@ -49,10 +82,12 @@ export const convertHtmlToLines = (html) => {
}
const handleClose = (tag) => { // handles closing tags
- flush()
- buffer.push(tag)
if (level[0] === getTagName(tag)) {
+ flush()
+ buffer.push(tag)
level.shift()
+ } else { // Broken case
+ textBuffer += tag
}
}
@@ -67,10 +102,10 @@ export const convertHtmlToLines = (html) => {
const tagFull = tagBuffer
tagBuffer = null
const tagName = getTagName(tagFull)
- if (handledTags.has(tagName)) {
- if (tagName === 'br') {
+ if (allElements.has(tagName)) {
+ if (linebreakElements.has(tagName)) {
handleBr(tagFull)
- } else if (openCloseTags.has(tagName)) {
+ } else if (nonEmptyElements.has(tagName)) {
if (tagFull[1] === '/') {
handleClose(tagFull)
} else if (tagFull[tagFull.length - 2] === '/') {
diff --git a/test/unit/specs/services/html_converter/html_line_converter.spec.js b/test/unit/specs/services/html_converter/html_line_converter.spec.js
index 9485233f..c8c89700 100644
--- a/test/unit/specs/services/html_converter/html_line_converter.spec.js
+++ b/test/unit/specs/services/html_converter/html_line_converter.spec.js
@@ -1,8 +1,17 @@
import { convertHtmlToLines } from 'src/services/html_converter/html_line_converter.service.js'
-const mapOnlyText = (processor) => (input) => input.text ? processor(input.text) : input
+const greentextHandle = new Set(['p', 'div'])
+const mapOnlyText = (processor) => (input) => {
+ if (input.text && input.level.every(l => greentextHandle.has(l))) {
+ return processor(input.text)
+ } else if (input.text) {
+ return input.text
+ } else {
+ return input
+ }
+}
-describe('html_line_converter', () => {
+describe.only('html_line_converter', () => {
describe('with processor that keeps original line should not make any changes to HTML when', () => {
const processorKeep = (line) => line
it('fed with regular HTML with newlines', () => {
@@ -81,7 +90,7 @@ describe('html_line_converter', () => {
it('fed with very broken HTML with broken composition', () => {
const input = '
' - const output = '
_' + const output = '_
' const result = convertHtmlToLines(input) const comparableResult = result.map(mapOnlyText(processorReplace)).join('') expect(comparableResult).to.eql(output) @@ -111,7 +120,7 @@ describe('html_line_converter', () => { expect(comparableResult).to.eql(output) }) - it('fed with maybe valid HTML? self-closing divs and ps', () => { + it('fed with maybe valid HTML? (XHTML) self-closing divs and ps', () => { const input = 'a
what now ?' const output = '___' const result = convertHtmlToLines(input)