diff --git a/readme.md b/readme.md index 1a01cb2..2a39e1a 100644 --- a/readme.md +++ b/readme.md @@ -46,23 +46,34 @@ console.log(parser(html)) // Logs a PostHTML AST href: '#' }, content: [ - '\n ', + { + text: '\n ', + parent: [Circular] + }, { tag: 'span', attrs: { class: 'animals__cat', style: 'background: url(cat.png)' }, - content: ['Cat'] + content: [{ + text: 'Cat', + parent: [Circular], + }], + parent: [Circular] }, - '\n' - ] + { + text: '\n', + parent: [Circular] + } + ], + parent: [Circular] }] ``` ## PostHTML AST Format -Any parser being used with PostHTML should return a standard PostHTML [Abstract Syntax Tree](https://www.wikiwand.com/en/Abstract_syntax_tree) (AST). Fortunately, this is a very easy format to produce and understand. The AST is an array that can contain strings and objects. Any strings represent plain text content to be written to the output. Any objects represent HTML tags. +Any parser being used with PostHTML should return a standard PostHTML [Abstract Syntax Tree](https://www.wikiwand.com/en/Abstract_syntax_tree) (AST). Fortunately, this is a very easy format to produce and understand. The AST is an array that can contain two types of objects: tag objects and text node objects. Tag objects generally look something like this: @@ -72,11 +83,17 @@ Tag objects generally look something like this: attrs: { class: 'foo' }, - content: ['hello world!'] + content: [{ + text: 'hello world!', + parent: [Circular] + }], + parent: [Circular] } ``` -Tag objects can contain three keys. The `tag` key takes the name of the tag as the value. This can include custom tags. The optional `attrs` key takes an object with key/value pairs representing the attributes of the html tag. A boolean attribute has an empty string as its value. Finally, the optional `content` key takes an array as its value, which is a PostHTML AST. In this manner, the AST is a tree that should be walked recursively. +Tag objects can contain four keys. The `tag` key takes the name of the tag as the value. This can include custom tags. The optional `attrs` key takes an object with key/value pairs representing the attributes of the html tag. A boolean attribute has an empty string as its value. The optional `content` key takes an array as its value, which is a PostHTML AST. Finally, the optional `parent` key stores a reference to the parent node. In this manner, the AST is a tree that should be walked recursively. + +Text node objects can contain two keys. The `text` key contains the text itself and the optional `parent` key stores a reference to the parent node. ## Options diff --git a/src/index.ts b/src/index.ts index d9bdd82..db2aa19 100644 --- a/src/index.ts +++ b/src/index.ts @@ -16,7 +16,7 @@ export type Tag = string | boolean; export type Attributes = Record; export type Content = NodeText | Array; -export type NodeText = string | number; +export type NodeText = { text: string | number }; export type NodeTag = { tag?: Tag; attrs?: Attributes; @@ -24,7 +24,7 @@ export type NodeTag = { location?: SourceLocation; }; -export type Node = NodeText | NodeTag; +export type Node = (NodeText | NodeTag) & { parent?: NodeTag | Node[] }; const defaultOptions: ParserOptions = { lowerCaseTags: false, @@ -49,6 +49,21 @@ export const parser = (html: string, options: Options = {}): Node[] => { return bufArray[bufArray.length - 1]; } + function appendChild(parent: NodeTag | Node[], child: Node) { + child.parent = parent; + + if (Array.isArray(parent)) { + parent.push(child); + return; + } + + if (!Array.isArray(parent.content)) { + parent.content = []; + } + + parent.content.push(child); + } + function isDirective(directive: Directive, tag: string): boolean { if (directive.name instanceof RegExp) { const regex = new RegExp(directive.name.source, 'i'); @@ -84,17 +99,17 @@ export const parser = (html: string, options: Options = {}): Node[] => { if (isDirective(directive, name.toLowerCase())) { if (last === undefined) { - results.push(directiveText); + appendChild(results, { text: directiveText }); return; } - if (typeof last === 'object') { + if ((typeof last === 'object') && !('text' in last)) { if (last.content === undefined) { last.content = []; } if (Array.isArray(last.content)) { - last.content.push(directiveText); + appendChild(last, { text: directiveText }); } } } @@ -106,17 +121,17 @@ export const parser = (html: string, options: Options = {}): Node[] => { const comment = ``; if (last === undefined) { - results.push(comment); + appendChild(results, { text: comment }); return; } - if (typeof last === 'object') { + if ((typeof last === 'object') && !('text' in last)) { if (last.content === undefined) { last.content = []; } if (Array.isArray(last.content)) { - last.content.push(comment); + appendChild(last, { text: comment }); } } } @@ -142,7 +157,8 @@ export const parser = (html: string, options: Options = {}): Node[] => { function onclosetag() { const buf: Node | undefined = bufArray.pop(); - if (buf && typeof buf === 'object' && buf.location && parser.endIndex !== null) { + if (buf && typeof buf === 'object' && !('text' in buf) && + buf.location && parser.endIndex !== null) { buf.location.end = locationTracker.getPosition(parser.endIndex); } @@ -150,17 +166,17 @@ export const parser = (html: string, options: Options = {}): Node[] => { const last = bufferArrayLast(); if (bufArray.length <= 0) { - results.push(buf); + appendChild(results, buf); return; } - if (typeof last === 'object') { + if ((typeof last === 'object') && !('text' in last)) { if (last.content === undefined) { last.content = []; } if (Array.isArray(last.content)) { - last.content.push(buf); + appendChild(last, buf); } } } @@ -170,15 +186,16 @@ export const parser = (html: string, options: Options = {}): Node[] => { const last: Node = bufferArrayLast(); if (last === undefined) { - results.push(text); + appendChild(results, { text }); return; } - if (typeof last === 'object') { + if ((typeof last === 'object') && !('text' in last)) { if (last.content && Array.isArray(last.content) && last.content.length > 0) { const lastContentNode = last.content[last.content.length - 1]; - if (typeof lastContentNode === 'string' && !lastContentNode.startsWith(''); - const expected = ['']; + const expected: Node[] = [{ text: '' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse CDATA', t => { const tree = parser('', { xmlMode: true }); - const expected = [{ tag: 'script', content: ['console.log(1);'] }]; + const expected: Node[] = [{ tag: 'script', content: [{ text: 'console.log(1);' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; t.deepEqual(tree, expected); }); @@ -23,7 +27,7 @@ test('should be parse tag with escape object in attribute', t => { const html = ''; const tree = parser(html); - const expected = [ + const expected: Node[] = [ { tag: 'button', attrs: { @@ -32,6 +36,7 @@ test('should be parse tag with escape object in attribute', t => { } } ]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); @@ -39,7 +44,7 @@ test.skip('should be parse tag with object in attribute data witchout escape', t const html = ''; const tree = parser(html); - const expected = [ + const expected: Node[] = [ { tag: 'button', attrs: { @@ -48,6 +53,7 @@ test.skip('should be parse tag with object in attribute data witchout escape', t } } ]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); @@ -56,7 +62,7 @@ test.skip('should be parse tag with object in attribute data escape', t => { const html = ''; const tree = parser(html); - const expected = [ + const expected: Node[] = [ { tag: 'button', attrs: { @@ -65,36 +71,50 @@ test.skip('should be parse tag with object in attribute data escape', t => { } } ]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse isolated comment', t => { const tree = parser('
'); - const expected = [{ tag: 'div', content: [''] }]; + const expected: Node[] = [{ tag: 'div', content: [{ text: '' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse comment before text content', t => { const tree = parser('
Text after comment
'); - const expected = [{ tag: 'div', content: ['', 'Text after comment'] }]; + const expected: Node[] = [{ tag: 'div', content: [{ text: '' }, { text: 'Text after comment' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[1].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse comment after text content', t => { const tree = parser('
Text before comment.
'); - const expected = [{ tag: 'div', content: ['Text before comment.', ''] }]; + const expected: Node[] = [{ tag: 'div', content: [{ text: 'Text before comment.' }, { text: '' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[1].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse comment in the middle of text content', t => { const tree = parser('
Text surrounding a comment.
'); - const expected = [{ tag: 'div', content: ['Text surrounding ', '', ' a comment.'] }]; + const expected: Node[] = [{ tag: 'div', content: [{ text: 'Text surrounding ' }, { text: '' }, { text: ' a comment.' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[1].parent = expected[0]; + expected[0].content[2].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse doctype', t => { const tree = parser(''); - const expected = ['']; + const expected: Node[] = [{ text: '' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); @@ -105,7 +125,8 @@ test('should be parse directive', t => { ] }; const tree = parser('', options); - const expected = ['']; + const expected: Node[] = [{ text: '' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); @@ -116,9 +137,11 @@ test('should be parse regular expression directive', t => { ] }; const tree1 = parser('', options); - const expected1 = ['']; + const expected1: Node[] = [{ text: '' }]; + expected1[0].parent = expected1; const tree2 = parser('', options); - const expected2 = ['']; + const expected2: Node[] = [{ text: '' }]; + expected2[0].parent = expected2; t.deepEqual(tree1, expected1); t.deepEqual(tree2, expected2); @@ -133,122 +156,164 @@ test('should be parse directives and tag', t => { }; const html = '
{{%njk test %}}'; const tree = parser(html, options); - const expected = [ - '', + const expected: Node[] = [ + { text: '' }, { - content: [''], + content: [{ text: '' }], tag: 'header' }, { - content: ['{{%njk test %}}'], + content: [{ text: '{{%njk test %}}' }], tag: 'body' } ]; + expected[0].parent = expected; + expected[1].parent = expected; + expected[1].content[0].parent = expected[1]; + expected[2].parent = expected; + expected[2].content[0].parent = expected[2]; t.deepEqual(tree, expected); }); test('should be parse tag', t => { const tree = parser(''); - const expected = [{ tag: 'html' }]; + const expected: Node[] = [{ tag: 'html' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse doctype and tag', t => { const tree = parser(''); - const expected = ['', { tag: 'html' }]; + const expected: Node[] = [{ text: '' }, { tag: 'html' }]; + expected[0].parent = expected; + expected[1].parent = expected; t.deepEqual(tree, expected); }); test('should be parse tag attrs', t => { const tree = parser('
'); - const expected = [{ + const expected: Node[] = [{ tag: 'div', attrs: { id: 'id', class: 'class' } }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse text', t => { const tree = parser('Text'); - const expected = ['Text']; + const expected: Node[] = [{ text: 'Text' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse text in content', t => { const tree = parser('
Text
'); - const expected = [{ tag: 'div', content: ['Text'] }]; + const expected: Node[] = [{ tag: 'div', content: [{ text: 'Text' }] }]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse not a single node in tree', t => { const tree = parser('Text1Text2Text3'); - const expected = [ - { tag: 'span', content: ['Text1'] }, { tag: 'span', content: ['Text2'] }, 'Text3' + const expected: Node[] = [ + { tag: 'span', content: [{ text: 'Text1' }] }, + { tag: 'span', content: [{ text: 'Text2' }] }, + { text: 'Text3' } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[1].parent = expected; + expected[1].content[0].parent = expected[1]; + expected[2].parent = expected; t.deepEqual(tree, expected); }); test('should be parse not a single node in parent content', t => { const tree = parser('
Text1Text2Text3
'); - const expected = [ - { tag: 'div', content: [{ tag: 'span', content: ['Text1'] }, { tag: 'span', content: ['Text2'] }, 'Text3'] } + const expected: Node[] = [ + { + tag: 'div', + content: [ + { tag: 'span', content: [{ text: 'Text1' }] }, + { tag: 'span', content: [{ text: 'Text2' }] }, + { text: 'Text3' } + ] + } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[0].content[0].parent = expected[0].content[0]; + expected[0].content[1].parent = expected[0]; + expected[0].content[1].content[0].parent = expected[0].content[1]; + expected[0].content[2].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse camelCase tag name', t => { const tree = parser(''); - const expected = [ + const expected: Node[] = [ { tag: 'mySuperTag' } ]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should be parse simple contents are split with "<" in comment', t => { const html = ' /* width < 800px */
test
'; const tree = parser(html); - const expected = [ - { tag: 'a', content: [' /* width < 800px */ ', { tag: 'hr' }, ' test'] } + const expected: Node[] = [ + { tag: 'a', content: [{ text: ' /* width < 800px */ ' }, { tag: 'hr' }, { text: ' test' }] } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[1].parent = expected[0]; + expected[0].content[2].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse style contents are split with "<" in comment', t => { const html = ''; const tree = parser(html); - const expected = [ - { tag: 'style', content: [' /* width < 800px */ @media (max-width: 800px) { /* selectors */} '] } + const expected: Node[] = [ + { tag: 'style', content: [{ text: ' /* width < 800px */ @media (max-width: 800px) { /* selectors */} ' }] } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; t.deepEqual(tree, expected); }); test('should be parse script contents are split with "<" in comment', t => { const html = ''; const tree = parser(html); - const expected = [ + const expected: Node[] = [ { tag: 'script', content: [ - ' var str = \'hey { const html = '‌ ©'; const tree = parser(html); - const expected = ['‌ ©']; + const expected: Node[] = [{ text: '‌ ©' }]; + expected[0].parent = expected; t.deepEqual(tree, expected); }); test('should parse with source locations', t => { const html = '

Test

\n

Foo

'; const tree = parser(html, { sourceLocations: true }); - const expected = [ + const expected: Node[] = [ { tag: 'h1', - content: ['Test'], + content: [{ text: 'Test' }], location: { start: { line: 1, @@ -260,13 +325,13 @@ test('should parse with source locations', t => { } } }, - '\n', + { text: '\n' }, { tag: 'p', content: [ { tag: 'b', - content: ['Foo'], + content: [{ text: 'Foo' }], location: { start: { line: 2, @@ -291,17 +356,23 @@ test('should parse with source locations', t => { } } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[1].parent = expected; + expected[2].parent = expected; + expected[2].content[0].parent = expected[2]; + expected[2].content[0].content[0].parent = expected[2].content[0]; t.deepEqual(tree, expected); }); test('should parse with input in button', t => { const html = ''; const tree = parser(html, { xmlMode: true }); - const expected = [ + const expected: Node[] = [ { tag: 'button', content: [ - 'Hello ', + { text: 'Hello ' }, { tag: 'input', attrs: { @@ -309,9 +380,13 @@ test('should parse with input in button', t => { 'ng-hide': 'true' } }, - 'PostHtml' + { text: 'PostHtml' } ] } ]; + expected[0].parent = expected; + expected[0].content[0].parent = expected[0]; + expected[0].content[1].parent = expected[0]; + expected[0].content[2].parent = expected[0]; t.deepEqual(tree, expected); });