/************************************************************* * * Copyright (c) 2018 The MathJax Consortium * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @fileoverview Implements a lightweight DOM adaptor * * @author dpvc@mathjax.org (Davide Cervone) */ import {AttributeData} from '../../core/DOMAdaptor.js'; import {MinHTMLAdaptor, MinDOMParser} from '../HTMLAdaptor.js'; import * as Entities from '../../util/Entities.js'; import {LiteDocument} from './Document.js'; import {LiteElement} from './Element.js'; import {LiteText, LiteComment} from './Text.js'; import {LiteAdaptor} from '../liteAdaptor.js'; /** * Patterns used in parsing serialized HTML */ export namespace PATTERNS { export const TAGNAME = '[a-z][^\\s\\n>]*'; export const ATTNAME = '[a-z][^\\s\\n>=]*'; export const VALUE = `(?:'[^']*'|"[^"]*"|[^\\s\\n]+)`; export const VALUESPLIT = `(?:'([^']*)'|"([^"]*)"|([^\\s\\n]+))`; export const SPACE = '(?:\\s|\\n)+'; export const OPTIONALSPACE = '(?:\\s|\\n)*'; export const ATTRIBUTE = ATTNAME + '(?:' + OPTIONALSPACE + '=' + OPTIONALSPACE + VALUE + ')?'; export const ATTRIBUTESPLIT = '(' + ATTNAME + ')(?:' + OPTIONALSPACE + '=' + OPTIONALSPACE + VALUESPLIT + ')?'; export const TAG = '(<(?:' + TAGNAME + '(?:' + SPACE + ATTRIBUTE + ')*' + OPTIONALSPACE + '/?|/' + TAGNAME + '|!--[^]*?--|![^]*?)(?:>|$))'; export const tag = new RegExp(TAG,"i"); export const attr = new RegExp(ATTRIBUTE,"i"); export const attrsplit = new RegExp(ATTRIBUTESPLIT,"i"); } /************************************************************/ /** * Implements a lightweight DOMParser replacement * (Not perfect, but handles most well-formed HTML) */ export class LiteParser implements MinDOMParser { /** * The list of self-closing tags */ public static SELF_CLOSING: {[name: string]: boolean} = { area: true, base: true, br: true, col: true, command: true, embed: true, hr: true, img: true, input: true, keygen: true, link: true, menuitem: true, meta: true, param: true, source: true, track: true, wbr: true }; /** * The list of tags chose content is not parsed (PCDATA) */ public static PCDATA: {[name: string]: boolean} = { option: true, textarea: true, fieldset: true, title: true, style: true, script: true }; /** * The list of attributes that don't get entity translation */ public static CDATA_ATTR: {[name: string]: boolean} = { style: true, datafld: true, datasrc: true, href: true, src: true, longdesc: true, usemap: true, cite: true, datetime: true, action: true, axis: true, profile: true, content: true, scheme: true }; /** * @override */ public parseFromString(text: string, format: string = 'text/html', adaptor: LiteAdaptor = null) { const root = adaptor.createDocument(); let node = adaptor.body(root); // // Split the HTML into an array of text, tag, text, tag, ... // Then loop through them and add text nodes and process tags. // let parts = text.replace(/<\?.*?\?>/g,'').split(PATTERNS.tag); while (parts.length) { const text = parts.shift(); const tag = parts.shift(); if (text) { this.addText(adaptor, node, text); } if (tag && tag.charAt(tag.length - 1) === '>') { if (tag.charAt(1) === '!') { this.addComment(adaptor, node, tag); } else if (tag.charAt(1) === '/') { node = this.closeTag(adaptor, node, tag); } else { node = this.openTag(adaptor, node, tag, parts); } } } this.checkDocument(adaptor, root); return root; } /** * @param {LiteAdaptor} adaptor The adaptor for managing nodes * @param {LiteElement} node The node to add a text element to * @param {string} text The text for the text node * @return {LiteText} The text element */ protected addText(adaptor: LiteAdaptor, node: LiteElement, text: string) { text = Entities.translate(text); return adaptor.append(node, adaptor.text(text)) as LiteText; } /** * @param {LiteAdaptor} adaptor The adaptor for managing nodes * @param {LiteElement} node The node to add a comment to * @param {string} comment The text for the comment node * @return {LiteText} The comment element */ protected addComment(adaptor: LiteAdaptor, node: LiteElement, comment: string) { return adaptor.append(node, new LiteComment(comment)) as LiteComment; } /** * @param {LiteAdaptor} adaptor The adaptor for managing nodes * @param {LiteElement} node The node to close * @param {string} tag The close tag being processed * @return {LiteElement} The first unclosed parent node */ protected closeTag(adaptor: LiteAdaptor, node: LiteElement, tag: string) { const kind = tag.slice(2,tag.length - 1).toLowerCase(); while (adaptor.parent(node) && adaptor.kind(node) !== kind) { node = adaptor.parent(node); } return adaptor.parent(node); } /** * @param {LiteAdaptor} adaptor The adaptor for managing nodes * @param {LiteElement} node The parent node for the tag * @param {string} tag The tag being processed * @param {string[]} parts The rest of the text/tag array * @return {LiteElement} The node to which the next tag will be added */ protected openTag(adaptor: LiteAdaptor, node: LiteElement, tag: string, parts: string[]) { const PCDATA = (this.constructor as typeof LiteParser).PCDATA; const SELF_CLOSING = (this.constructor as typeof LiteParser).SELF_CLOSING; // // Get the child to be added to the node // const kind = tag.match(/<(.*?)[\s\n>]/)[1].toLowerCase(); const child = adaptor.node(kind) as LiteElement; // // Split out the tag attributes as an array of space, name, value1, value3, value3, // where value1, value2, and value3 are the value of the node (only one is defined) // that come from matching quoted strings with ' (value1), " (value2) or no quotes (value3). // const attributes = tag.replace(/^<.*?[\s\n>]/, '').split(PATTERNS.attrsplit); // // If the tag was complete (it ends with > or has no attributes) // if (attributes.pop().match(/>$/) || attributes.length < 5) { this.addAttributes(adaptor, child, attributes); adaptor.append(node, child); // // For non-self-closing tags, // For tags whose contents is PCDATA (like