481 lines
13 KiB
TypeScript
481 lines
13 KiB
TypeScript
// ==ClosureCompiler==
|
|
// @output_file_name default.js
|
|
// @compilation_level SIMPLE_OPTIMIZATIONS
|
|
// ==/ClosureCompiler==
|
|
// module.exports = {
|
|
// parse: parse,
|
|
// simplify: simplify,
|
|
// simplifyLostLess: simplifyLostLess,
|
|
// filter: filter,
|
|
// stringify: stringify,
|
|
// toContentString: toContentString,
|
|
// getElementById: getElementById,
|
|
// getElementsByClassName: getElementsByClassName,
|
|
// transformStream: transformStream,
|
|
// };
|
|
|
|
/**
|
|
* @author: Tobias Nickel
|
|
* @created: 06.04.2015
|
|
* I needed a small xmlparser chat can be used in a worker.
|
|
*/
|
|
|
|
interface Node {
|
|
tagName: string;
|
|
attributes: Record<string, string>;
|
|
children: (Node | string)[];
|
|
}
|
|
|
|
interface ParseOptions {
|
|
attrName?: string;
|
|
attrValue?: string;
|
|
parseNode: any;
|
|
pos?: number;
|
|
noChildNodes?: string[];
|
|
setPos?: boolean;
|
|
keepComments?: boolean;
|
|
keepWhitespace?: boolean;
|
|
simplify?: boolean;
|
|
filter?: (a: Node, b: Node) => boolean;
|
|
}
|
|
|
|
/**
|
|
* parseXML / html into a DOM Object. with no validation and some failur tolerance
|
|
* @param {string} source your XML to parse
|
|
* @param {ParseOptions} [options] all other options:
|
|
* @return {(Node | string)[]}
|
|
*/
|
|
export function parse(source: string, options: ParseOptions = {}): (Node | string)[] {
|
|
var pos = options.pos || 0;
|
|
var keepComments = !!options.keepComments;
|
|
var keepWhitespace = !!options.keepWhitespace
|
|
|
|
var openBracket = "<";
|
|
var openBracketCC = "<".charCodeAt(0);
|
|
var closeBracket = ">";
|
|
var closeBracketCC = ">".charCodeAt(0);
|
|
var minusCC = "-".charCodeAt(0);
|
|
var slashCC = "/".charCodeAt(0);
|
|
var exclamationCC = '!'.charCodeAt(0);
|
|
var singleQuoteCC = "'".charCodeAt(0);
|
|
var doubleQuoteCC = '"'.charCodeAt(0);
|
|
var openCornerBracketCC = '['.charCodeAt(0);
|
|
var closeCornerBracketCC = ']'.charCodeAt(0);
|
|
|
|
|
|
/**
|
|
* parsing a list of entries
|
|
*/
|
|
function parseChildren(tagName: string) {
|
|
var children = [];
|
|
while (source[pos]) {
|
|
if (source.charCodeAt(pos) == openBracketCC) {
|
|
if (source.charCodeAt(pos + 1) === slashCC) {
|
|
var closeStart = pos + 2;
|
|
pos = source.indexOf(closeBracket, pos);
|
|
|
|
var closeTag = source.substring(closeStart, pos)
|
|
if (closeTag.indexOf(tagName) == -1) {
|
|
var parsedText = source.substring(0, pos).split('\n');
|
|
throw new Error(
|
|
'Unexpected close tag\nLine: ' + (parsedText.length - 1) +
|
|
'\nColumn: ' + (parsedText[parsedText.length - 1].length + 1) +
|
|
'\nChar: ' + source[pos]
|
|
);
|
|
}
|
|
|
|
if (pos + 1) pos += 1
|
|
|
|
return children;
|
|
} else if (source.charCodeAt(pos + 1) === exclamationCC) {
|
|
if (source.charCodeAt(pos + 2) == minusCC) {
|
|
//comment support
|
|
const startCommentPos = pos;
|
|
while (pos !== -1 && !(source.charCodeAt(pos) === closeBracketCC && source.charCodeAt(pos - 1) == minusCC && source.charCodeAt(pos - 2) == minusCC && pos != -1)) {
|
|
pos = source.indexOf(closeBracket, pos + 1);
|
|
}
|
|
if (pos === -1) {
|
|
pos = source.length
|
|
}
|
|
if (keepComments) {
|
|
children.push(source.substring(startCommentPos, pos + 1));
|
|
}
|
|
} else if (
|
|
source.charCodeAt(pos + 2) === openCornerBracketCC &&
|
|
source.charCodeAt(pos + 8) === openCornerBracketCC &&
|
|
source.substr(pos + 3, 5).toLowerCase() === 'cdata'
|
|
) {
|
|
// cdata
|
|
var cdataEndIndex = source.indexOf(']]>', pos);
|
|
if (cdataEndIndex == -1) {
|
|
children.push(source.substr(pos + 9));
|
|
pos = source.length;
|
|
} else {
|
|
children.push(source.substring(pos + 9, cdataEndIndex));
|
|
pos = cdataEndIndex + 3;
|
|
}
|
|
continue;
|
|
} else {
|
|
// doctypesupport
|
|
const startDoctype = pos + 1;
|
|
pos += 2;
|
|
var encapsuled = false;
|
|
while ((source.charCodeAt(pos) !== closeBracketCC || encapsuled === true) && source[pos]) {
|
|
if (source.charCodeAt(pos) === openCornerBracketCC) {
|
|
encapsuled = true;
|
|
} else if (encapsuled === true && source.charCodeAt(pos) === closeCornerBracketCC) {
|
|
encapsuled = false;
|
|
}
|
|
pos++;
|
|
}
|
|
children.push(source.substring(startDoctype, pos));
|
|
}
|
|
pos++;
|
|
continue;
|
|
}
|
|
var node = parseNode();
|
|
children.push(node);
|
|
if (node.tagName[0] === '?') {
|
|
children.push(...node.children);
|
|
node.children = [];
|
|
}
|
|
} else {
|
|
var text = parseText();
|
|
if (keepWhitespace) {
|
|
if (text.length > 0) {
|
|
children.push(text);
|
|
}
|
|
} else {
|
|
var trimmed = text.trim();
|
|
if (trimmed.length > 0) {
|
|
children.push(trimmed);
|
|
}
|
|
}
|
|
pos++;
|
|
}
|
|
}
|
|
return children;
|
|
}
|
|
|
|
/**
|
|
* returns the text outside of texts until the first '<'
|
|
*/
|
|
function parseText() {
|
|
var start = pos;
|
|
pos = source.indexOf(openBracket, pos) - 1;
|
|
if (pos === -2)
|
|
pos = source.length;
|
|
return source.slice(start, pos + 1);
|
|
}
|
|
/**
|
|
* returns text until the first nonAlphabetic letter
|
|
*/
|
|
var nameSpacer = '\r\n\t>/= ';
|
|
|
|
function parseName() {
|
|
var start = pos;
|
|
while (nameSpacer.indexOf(source[pos]) === -1 && source[pos]) {
|
|
pos++;
|
|
}
|
|
return source.slice(start, pos);
|
|
}
|
|
/**
|
|
* is parsing a node, including tagName, Attributes and its children,
|
|
* to parse children it uses the parseChildren again, that makes the parsing recursive
|
|
*/
|
|
var NoChildNodes = options.noChildNodes || ['img', 'br', 'input', 'meta', 'link', 'hr'];
|
|
|
|
function parseNode(): Node {
|
|
pos++;
|
|
const tagName = parseName();
|
|
const attributes: Record<string, string> = {};
|
|
let children: (string | Node)[] = [];
|
|
|
|
// parsing attributes
|
|
while (source.charCodeAt(pos) !== closeBracketCC && source[pos]) {
|
|
var c = source.charCodeAt(pos);
|
|
if ((c > 64 && c < 91) || (c > 96 && c < 123)) {
|
|
//if('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.indexOf(S[pos])!==-1 ){
|
|
var name = parseName();
|
|
// search beginning of the string
|
|
var code = source.charCodeAt(pos);
|
|
while (code && code !== singleQuoteCC && code !== doubleQuoteCC && !((code > 64 && code < 91) || (code > 96 && code < 123)) && code !== closeBracketCC) {
|
|
pos++;
|
|
code = source.charCodeAt(pos);
|
|
}
|
|
if (code === singleQuoteCC || code === doubleQuoteCC) {
|
|
var value = parseString();
|
|
if (pos === -1) {
|
|
return {
|
|
tagName,
|
|
attributes,
|
|
children,
|
|
};
|
|
}
|
|
} else {
|
|
value = null;
|
|
pos--;
|
|
}
|
|
attributes[name] = value;
|
|
}
|
|
pos++;
|
|
}
|
|
// optional parsing of children
|
|
if (source.charCodeAt(pos - 1) !== slashCC) {
|
|
if (tagName == "script") {
|
|
var start = pos + 1;
|
|
pos = source.indexOf('</script>', pos);
|
|
children = [source.slice(start, pos)];
|
|
pos += 9;
|
|
} else if (tagName == "style") {
|
|
var start = pos + 1;
|
|
pos = source.indexOf('</style>', pos);
|
|
children = [source.slice(start, pos)];
|
|
pos += 8;
|
|
} else if (NoChildNodes.indexOf(tagName) === -1) {
|
|
pos++;
|
|
children = parseChildren(tagName);
|
|
} else {
|
|
pos++
|
|
}
|
|
} else {
|
|
pos++;
|
|
}
|
|
return {
|
|
tagName,
|
|
attributes,
|
|
children,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* is parsing a string, that starts with a char and with the same usually ' or "
|
|
*/
|
|
|
|
function parseString() {
|
|
var startChar = source[pos];
|
|
var startpos = pos + 1;
|
|
pos = source.indexOf(startChar, startpos)
|
|
return source.slice(startpos, pos);
|
|
}
|
|
|
|
/**
|
|
*
|
|
*/
|
|
function findElements() {
|
|
var r = new RegExp('\\s' + options.attrName + '\\s*=[\'"]' + options.attrValue + '[\'"]').exec(source)
|
|
if (r) {
|
|
return r.index;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
var out: (string | Node)[] | null = null;
|
|
if (options.attrValue !== undefined) {
|
|
options.attrName = options.attrName || 'id';
|
|
out = [];
|
|
|
|
while ((pos = findElements()) !== -1) {
|
|
pos = source.lastIndexOf('<', pos);
|
|
if (pos !== -1) {
|
|
out.push(parseNode());
|
|
}
|
|
source = source.substr(pos);
|
|
pos = 0;
|
|
}
|
|
} else if (options.parseNode) {
|
|
out = parseNode()
|
|
} else {
|
|
out = parseChildren('');
|
|
}
|
|
|
|
if (options.filter) {
|
|
out = filter(out, options.filter);
|
|
}
|
|
|
|
if (options.simplify) {
|
|
return simplify(Array.isArray(out) ? out : [out]);
|
|
}
|
|
|
|
if (options.setPos) {
|
|
out.pos = pos;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* transform the DomObject to an object that is like the object of PHP`s simple_xmp_load_*() methods.
|
|
* this format helps you to write that is more likely to keep your program working, even if there a small changes in the XML schema.
|
|
* be aware, that it is not possible to reproduce the original xml from a simplified version, because the order of elements is not saved.
|
|
* therefore your program will be more flexible and easier to read.
|
|
*
|
|
* @param {tNode[]} children the childrenList
|
|
*/
|
|
export function simplify(children: any[]) {
|
|
var out = {};
|
|
if (!children.length) {
|
|
return '';
|
|
}
|
|
|
|
if (children.length === 1 && typeof children[0] == 'string') {
|
|
return children[0];
|
|
}
|
|
// map each object
|
|
children.forEach(function(child: { tagName: string | number; children: any; attributes: {}; }) {
|
|
if (typeof child !== 'object') {
|
|
return;
|
|
}
|
|
if (!out[child.tagName])
|
|
out[child.tagName] = [];
|
|
var kids = simplify(child.children);
|
|
out[child.tagName].push(kids);
|
|
if (Object.keys(child.attributes).length && typeof kids !== 'string') {
|
|
kids._attributes = child.attributes;
|
|
}
|
|
});
|
|
|
|
for (var i in out) {
|
|
if (out[i].length == 1) {
|
|
out[i] = out[i][0];
|
|
}
|
|
}
|
|
|
|
return out;
|
|
};
|
|
|
|
|
|
/**
|
|
* similar to simplify, but lost less
|
|
*
|
|
* @param {tNode[]} children the childrenList
|
|
*/
|
|
export function simplifyLostLess(children: any[], parentAttributes = {}) {
|
|
var out = {};
|
|
if (!children.length) {
|
|
return out;
|
|
}
|
|
|
|
if (children.length === 1 && typeof children[0] == 'string') {
|
|
return Object.keys(parentAttributes).length ? {
|
|
_attributes: parentAttributes,
|
|
value: children[0]
|
|
} : children[0];
|
|
}
|
|
// map each object
|
|
children.forEach(function(child: { tagName: string | number; children: any; attributes: {} | undefined; }) {
|
|
if (typeof child !== 'object') {
|
|
return;
|
|
}
|
|
if (!out[child.tagName])
|
|
out[child.tagName] = [];
|
|
var kids = simplifyLostLess(child.children || [], child.attributes);
|
|
out[child.tagName].push(kids);
|
|
if (Object.keys(child.attributes).length) {
|
|
kids._attributes = child.attributes;
|
|
}
|
|
});
|
|
|
|
return out;
|
|
};
|
|
|
|
/**
|
|
* behaves the same way as Array.filter, if the filter method return true, the element is in the resultList
|
|
* @params children{Array} the children of a node
|
|
* @param f{function} the filter method
|
|
*/
|
|
export function filter(children: any[], f: (arg0: any, arg1: any, arg2: number, arg3: string) => any, dept = 0, path = '') {
|
|
var out: any[] = [];
|
|
children.forEach(function(child: { children: any; tagName: string; }, i: string) {
|
|
if (typeof(child) === 'object' && f(child, i, dept, path)) out.push(child);
|
|
if (child.children) {
|
|
var kids = filter(child.children, f, dept + 1, (path ? path + '.' : '') + i + '.' + child.tagName);
|
|
out = out.concat(kids);
|
|
}
|
|
});
|
|
return out;
|
|
};
|
|
|
|
/**
|
|
* stringify a previously parsed string object.
|
|
* this is useful,
|
|
* 1. to remove whitespace
|
|
* 2. to recreate xml data, with some changed data.
|
|
* @param {tNode} O the object to Stringify
|
|
*/
|
|
export function stringify(O: any) {
|
|
var out = '';
|
|
|
|
function writeChildren(O: string | any[]) {
|
|
if (O) {
|
|
for (var i = 0; i < O.length; i++) {
|
|
if (typeof O[i] == 'string') {
|
|
out += O[i].trim();
|
|
} else {
|
|
writeNode(O[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function writeNode(N: { tagName: string | string[]; attributes: { [x: string]: string; }; children: any; }) {
|
|
out += "<" + N.tagName;
|
|
for (var i in N.attributes) {
|
|
if (N.attributes[i] === null) {
|
|
out += ' ' + i;
|
|
} else if (N.attributes[i].indexOf('"') === -1) {
|
|
out += ' ' + i + '="' + N.attributes[i].trim() + '"';
|
|
} else {
|
|
out += ' ' + i + "='" + N.attributes[i].trim() + "'";
|
|
}
|
|
}
|
|
if (N.tagName[0] === '?') {
|
|
out += '?>';
|
|
return;
|
|
}
|
|
out += '>';
|
|
writeChildren(N.children);
|
|
out += '</' + N.tagName + '>';
|
|
}
|
|
writeChildren(O);
|
|
|
|
return out;
|
|
};
|
|
|
|
|
|
/**
|
|
* use this method to read the text content, of some node.
|
|
* It is great if you have mixed content like:
|
|
* this text has some <b>big</b> text and a <a href=''>link</a>
|
|
* @return {string}
|
|
*/
|
|
export function toContentString(tDom: string): string {
|
|
if (Array.isArray(tDom)) {
|
|
var out = '';
|
|
tDom.forEach(function(e) {
|
|
out += ' ' + toContentString(e);
|
|
out = out.trim();
|
|
});
|
|
return out;
|
|
} else if (typeof tDom === 'object') {
|
|
return toContentString(tDom.children)
|
|
} else {
|
|
return ' ' + tDom;
|
|
}
|
|
};
|
|
|
|
export function getElementById(S: string, id: any, simplified: any) {
|
|
var out = parse(S, {
|
|
attrValue: id
|
|
});
|
|
return simplified ? simplify(out) : out[0];
|
|
};
|
|
|
|
export function getElementsByClassName(S: string, classname: string, simplified: any) {
|
|
const out = parse(S, {
|
|
attrName: 'class',
|
|
attrValue: '[a-zA-Z0-9- ]*' + classname + '[a-zA-Z0-9- ]*'
|
|
});
|
|
return simplified ? simplify(out) : out;
|
|
}; |