online-energieausweis/src/lib/helpers/txml.ts

// ==ClosureCompiler==
// @output_file_name default.js
// @compilation_level SIMPLE_OPTIMIZATIONS
// ==/ClosureCompiler==
// module.exports = {
//     parse: parse,
//     simplify: simplify,
//     simplifyLostLess: simplifyLostLess,
//     filter: filter,
//     stringify: stringify,
//     toContentString: toContentString,
//     getElementById: getElementById,
//     getElementsByClassName: getElementsByClassName,
//     transformStream: transformStream,
// };

/**
 * @author: Tobias Nickel
 * @created: 06.04.2015
 * I needed a small xmlparser chat can be used in a worker.
 */

interface Node {
	tagName: string;
	attributes: Record<string, string>;
	children: (Node | string)[];
}

interface ParseOptions {
	attrName?: string;
	attrValue?: string;
	parseNode: any;
	pos?: number;
	noChildNodes?: string[];
	setPos?: boolean;
	keepComments?: boolean;
	keepWhitespace?: boolean;
	simplify?: boolean;
	filter?: (a: Node, b: Node) => boolean;
}

/**
 * parseXML / html into a DOM Object. with no validation and some failur tolerance
 * @param {string} source your XML to parse
 * @param {ParseOptions} [options]  all other options:
 * @return {(Node | string)[]}
 */
export function parse(source: string, options: ParseOptions = {}): (Node | string)[] {
	var pos = options.pos || 0;
	var keepComments = !!options.keepComments;
	var keepWhitespace = !!options.keepWhitespace

	var openBracket = "<";
	var openBracketCC = "<".charCodeAt(0);
	var closeBracket = ">";
	var closeBracketCC = ">".charCodeAt(0);
	var minusCC = "-".charCodeAt(0);
	var slashCC = "/".charCodeAt(0);
	var exclamationCC = '!'.charCodeAt(0);
	var singleQuoteCC = "'".charCodeAt(0);
	var doubleQuoteCC = '"'.charCodeAt(0);
	var openCornerBracketCC = '['.charCodeAt(0);
	var closeCornerBracketCC = ']'.charCodeAt(0);


	/**
	 * parsing a list of entries
	 */
	function parseChildren(tagName: string) {
			var children = [];
			while (source[pos]) {
					if (source.charCodeAt(pos) == openBracketCC) {
							if (source.charCodeAt(pos + 1) === slashCC) {
									var closeStart = pos + 2;
									pos = source.indexOf(closeBracket, pos);

									var closeTag = source.substring(closeStart, pos)
									if (closeTag.indexOf(tagName) == -1) {
											var parsedText = source.substring(0, pos).split('\n');
											throw new Error(
													'Unexpected close tag\nLine: ' + (parsedText.length - 1) +
													'\nColumn: ' + (parsedText[parsedText.length - 1].length + 1) +
													'\nChar: ' + source[pos]
											);
									}

									if (pos + 1) pos += 1

									return children;
							} else if (source.charCodeAt(pos + 1) === exclamationCC) {
									if (source.charCodeAt(pos + 2) == minusCC) {
											//comment support
											const startCommentPos = pos;
											while (pos !== -1 && !(source.charCodeAt(pos) === closeBracketCC && source.charCodeAt(pos - 1) == minusCC && source.charCodeAt(pos - 2) == minusCC && pos != -1)) {
													pos = source.indexOf(closeBracket, pos + 1);
											}
											if (pos === -1) {
													pos = source.length
											}
											if (keepComments) {
													children.push(source.substring(startCommentPos, pos + 1));
											}
									} else if (
											source.charCodeAt(pos + 2) === openCornerBracketCC &&
											source.charCodeAt(pos + 8) === openCornerBracketCC &&
											source.substr(pos + 3, 5).toLowerCase() === 'cdata'
									) {
											// cdata
											var cdataEndIndex = source.indexOf(']]>', pos);
											if (cdataEndIndex == -1) {
													children.push(source.substr(pos + 9));
													pos = source.length;
											} else {
													children.push(source.substring(pos + 9, cdataEndIndex));
													pos = cdataEndIndex + 3;
											}
											continue;
									} else {
											// doctypesupport
											const startDoctype = pos + 1;
											pos += 2;
											var encapsuled = false;
											while ((source.charCodeAt(pos) !== closeBracketCC || encapsuled === true) && source[pos]) {
													if (source.charCodeAt(pos) === openCornerBracketCC) {
															encapsuled = true;
													} else if (encapsuled === true && source.charCodeAt(pos) === closeCornerBracketCC) {
															encapsuled = false;
													}
													pos++;
											}
											children.push(source.substring(startDoctype, pos));
									}
									pos++;
									continue;
							}
							var node = parseNode();
							children.push(node);
							if (node.tagName[0] === '?') {
									children.push(...node.children);
									node.children = [];
							}
					} else {
							var text = parseText();
							if (keepWhitespace) {
									if (text.length > 0) {
											children.push(text);
									}
							} else {
									var trimmed = text.trim();
									if (trimmed.length > 0) {
											children.push(trimmed);
									}
							}
							pos++;
					}
			}
			return children;
	}

	/**
	 *    returns the text outside of texts until the first '<'
	 */
	function parseText() {
			var start = pos;
			pos = source.indexOf(openBracket, pos) - 1;
			if (pos === -2)
					pos = source.length;
			return source.slice(start, pos + 1);
	}
	/**
	 *    returns text until the first nonAlphabetic letter
	 */
	var nameSpacer = '\r\n\t>/= ';

	function parseName() {
			var start = pos;
			while (nameSpacer.indexOf(source[pos]) === -1 && source[pos]) {
					pos++;
			}
			return source.slice(start, pos);
	}
	/**
	 *    is parsing a node, including tagName, Attributes and its children,
	 * to parse children it uses the parseChildren again, that makes the parsing recursive
	 */
	var NoChildNodes = options.noChildNodes || ['img', 'br', 'input', 'meta', 'link', 'hr'];

	function parseNode(): Node {
			pos++;
			const tagName = parseName();
			const attributes: Record<string, string> = {};
			let children: (string | Node)[] = [];

			// parsing attributes
			while (source.charCodeAt(pos) !== closeBracketCC && source[pos]) {
					var c = source.charCodeAt(pos);
					if ((c > 64 && c < 91) || (c > 96 && c < 123)) {
							//if('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.indexOf(S[pos])!==-1 ){
							var name = parseName();
							// search beginning of the string
							var code = source.charCodeAt(pos);
							while (code && code !== singleQuoteCC && code !== doubleQuoteCC && !((code > 64 && code < 91) || (code > 96 && code < 123)) && code !== closeBracketCC) {
									pos++;
									code = source.charCodeAt(pos);
							}
							if (code === singleQuoteCC || code === doubleQuoteCC) {
									var value = parseString();
									if (pos === -1) {
											return {
													tagName,
													attributes,
													children,
											};
									}
							} else {
									value = null;
									pos--;
							}
							attributes[name] = value;
					}
					pos++;
			}
			// optional parsing of children
			if (source.charCodeAt(pos - 1) !== slashCC) {
					if (tagName == "script") {
							var start = pos + 1;
							pos = source.indexOf('</script>', pos);
							children = [source.slice(start, pos)];
							pos += 9;
					} else if (tagName == "style") {
							var start = pos + 1;
							pos = source.indexOf('</style>', pos);
							children = [source.slice(start, pos)];
							pos += 8;
					} else if (NoChildNodes.indexOf(tagName) === -1) {
							pos++;
							children = parseChildren(tagName);
					} else {
							pos++
					}
			} else {
					pos++;
			}
			return {
					tagName,
					attributes,
					children,
			};
	}

	/**
	 *    is parsing a string, that starts with a char and with the same usually  ' or "
	 */

	function parseString() {
			var startChar = source[pos];
			var startpos = pos + 1;
			pos = source.indexOf(startChar, startpos)
			return source.slice(startpos, pos);
	}

	/**
	 *
	 */
	function findElements() {
			var r = new RegExp('\\s' + options.attrName + '\\s*=[\'"]' + options.attrValue + '[\'"]').exec(source)
			if (r) {
					return r.index;
			} else {
					return -1;
			}
	}

	var out: (string | Node)[] | null = null;
	if (options.attrValue !== undefined) {
			options.attrName = options.attrName || 'id';
			out = [];

			while ((pos = findElements()) !== -1) {
					pos = source.lastIndexOf('<', pos);
					if (pos !== -1) {
							out.push(parseNode());
					}
					source = source.substr(pos);
					pos = 0;
			}
	} else if (options.parseNode) {
			out = parseNode()
	} else {
			out = parseChildren('');
	}

	if (options.filter) {
			out = filter(out, options.filter);
	}

	if (options.simplify) {
			return simplify(Array.isArray(out) ? out : [out]);
	}

	if (options.setPos) {
			out.pos = pos;
	}

	return out;
}

/**
* transform the DomObject to an object that is like the object of PHP`s simple_xmp_load_*() methods.
* this format helps you to write that is more likely to keep your program working, even if there a small changes in the XML schema.
* be aware, that it is not possible to reproduce the original xml from a simplified version, because the order of elements is not saved.
* therefore your program will be more flexible and easier to read.
*
* @param {tNode[]} children the childrenList
*/
export function simplify(children: any[]) {
	var out = {};
	if (!children.length) {
			return '';
	}

	if (children.length === 1 && typeof children[0] == 'string') {
			return children[0];
	}
	// map each object
	children.forEach(function(child: { tagName: string | number; children: any; attributes: {}; }) {
			if (typeof child !== 'object') {
					return;
			}
			if (!out[child.tagName])
					out[child.tagName] = [];
			var kids = simplify(child.children);
			out[child.tagName].push(kids);
			if (Object.keys(child.attributes).length && typeof kids !== 'string') {
					kids._attributes = child.attributes;
			}
	});

	for (var i in out) {
			if (out[i].length == 1) {
					out[i] = out[i][0];
			}
	}

	return out;
};


/**
* similar to simplify, but lost less
*
* @param {tNode[]} children the childrenList
*/
export function simplifyLostLess(children: any[], parentAttributes = {}) {
	var out = {};
	if (!children.length) {
			return out;
	}

	if (children.length === 1 && typeof children[0] == 'string') {
			return Object.keys(parentAttributes).length ? {
					_attributes: parentAttributes,
					value: children[0]
			} : children[0];
	}
	// map each object
	children.forEach(function(child: { tagName: string | number; children: any; attributes: {} | undefined; }) {
			if (typeof child !== 'object') {
					return;
			}
			if (!out[child.tagName])
					out[child.tagName] = [];
			var kids = simplifyLostLess(child.children || [], child.attributes);
			out[child.tagName].push(kids);
			if (Object.keys(child.attributes).length) {
					kids._attributes = child.attributes;
			}
	});

	return out;
};

/**
* behaves the same way as Array.filter, if the filter method return true, the element is in the resultList
* @params children{Array} the children of a node
* @param f{function} the filter method
*/
export function filter(children: any[], f: (arg0: any, arg1: any, arg2: number, arg3: string) => any, dept = 0, path = '') {
	var out: any[] = [];
	children.forEach(function(child: { children: any; tagName: string; }, i: string) {
			if (typeof(child) === 'object' && f(child, i, dept, path)) out.push(child);
			if (child.children) {
					var kids = filter(child.children, f, dept + 1, (path ? path + '.' : '') + i + '.' + child.tagName);
					out = out.concat(kids);
			}
	});
	return out;
};

/**
* stringify a previously parsed string object.
* this is useful,
*  1. to remove whitespace
* 2. to recreate xml data, with some changed data.
* @param {tNode} O the object to Stringify
*/
export function stringify(O: any) {
	var out = '';

	function writeChildren(O: string | any[]) {
			if (O) {
					for (var i = 0; i < O.length; i++) {
							if (typeof O[i] == 'string') {
									out += O[i].trim();
							} else {
									writeNode(O[i]);
							}
					}
			}
	}

	function writeNode(N: { tagName: string | string[]; attributes: { [x: string]: string; }; children: any; }) {
			out += "<" + N.tagName;
			for (var i in N.attributes) {
					if (N.attributes[i] === null) {
							out += ' ' + i;
					} else if (N.attributes[i].indexOf('"') === -1) {
							out += ' ' + i + '="' + N.attributes[i].trim() + '"';
					} else {
							out += ' ' + i + "='" + N.attributes[i].trim() + "'";
					}
			}
			if (N.tagName[0] === '?') {
					out += '?>';
					return;
			}
			out += '>';
			writeChildren(N.children);
			out += '</' + N.tagName + '>';
	}
	writeChildren(O);

	return out;
};


/**
* use this method to read the text content, of some node.
* It is great if you have mixed content like:
* this text has some <b>big</b> text and a <a href=''>link</a>
* @return {string}
*/
export function toContentString(tDom: string): string {
	if (Array.isArray(tDom)) {
			var out = '';
			tDom.forEach(function(e) {
					out += ' ' + toContentString(e);
					out = out.trim();
			});
			return out;
	} else if (typeof tDom === 'object') {
			return toContentString(tDom.children)
	} else {
			return ' ' + tDom;
	}
};

export function getElementById(S: string, id: any, simplified: any) {
	var out = parse(S, {
			attrValue: id
	});
	return simplified ? simplify(out) : out[0];
};

export function getElementsByClassName(S: string, classname: string, simplified: any) {
	const out = parse(S, {
			attrName: 'class',
			attrValue: '[a-zA-Z0-9- ]*' + classname + '[a-zA-Z0-9- ]*'
	});
	return simplified ? simplify(out) : out;
};