Files
online-energieausweis/src/lib/helpers/txml.ts
2025-01-12 14:25:35 +07:00

481 lines
13 KiB
TypeScript

// ==ClosureCompiler==
// @output_file_name default.js
// @compilation_level SIMPLE_OPTIMIZATIONS
// ==/ClosureCompiler==
// module.exports = {
// parse: parse,
// simplify: simplify,
// simplifyLostLess: simplifyLostLess,
// filter: filter,
// stringify: stringify,
// toContentString: toContentString,
// getElementById: getElementById,
// getElementsByClassName: getElementsByClassName,
// transformStream: transformStream,
// };
/**
* @author: Tobias Nickel
* @created: 06.04.2015
* I needed a small xmlparser chat can be used in a worker.
*/
interface Node {
tagName: string;
attributes: Record<string, string>;
children: (Node | string)[];
}
interface ParseOptions {
attrName?: string;
attrValue?: string;
parseNode: any;
pos?: number;
noChildNodes?: string[];
setPos?: boolean;
keepComments?: boolean;
keepWhitespace?: boolean;
simplify?: boolean;
filter?: (a: Node, b: Node) => boolean;
}
/**
* parseXML / html into a DOM Object. with no validation and some failur tolerance
* @param {string} source your XML to parse
* @param {ParseOptions} [options] all other options:
* @return {(Node | string)[]}
*/
export function parse(source: string, options: ParseOptions = {}): (Node | string)[] {
var pos = options.pos || 0;
var keepComments = !!options.keepComments;
var keepWhitespace = !!options.keepWhitespace
var openBracket = "<";
var openBracketCC = "<".charCodeAt(0);
var closeBracket = ">";
var closeBracketCC = ">".charCodeAt(0);
var minusCC = "-".charCodeAt(0);
var slashCC = "/".charCodeAt(0);
var exclamationCC = '!'.charCodeAt(0);
var singleQuoteCC = "'".charCodeAt(0);
var doubleQuoteCC = '"'.charCodeAt(0);
var openCornerBracketCC = '['.charCodeAt(0);
var closeCornerBracketCC = ']'.charCodeAt(0);
/**
* parsing a list of entries
*/
function parseChildren(tagName: string) {
var children = [];
while (source[pos]) {
if (source.charCodeAt(pos) == openBracketCC) {
if (source.charCodeAt(pos + 1) === slashCC) {
var closeStart = pos + 2;
pos = source.indexOf(closeBracket, pos);
var closeTag = source.substring(closeStart, pos)
if (closeTag.indexOf(tagName) == -1) {
var parsedText = source.substring(0, pos).split('\n');
throw new Error(
'Unexpected close tag\nLine: ' + (parsedText.length - 1) +
'\nColumn: ' + (parsedText[parsedText.length - 1].length + 1) +
'\nChar: ' + source[pos]
);
}
if (pos + 1) pos += 1
return children;
} else if (source.charCodeAt(pos + 1) === exclamationCC) {
if (source.charCodeAt(pos + 2) == minusCC) {
//comment support
const startCommentPos = pos;
while (pos !== -1 && !(source.charCodeAt(pos) === closeBracketCC && source.charCodeAt(pos - 1) == minusCC && source.charCodeAt(pos - 2) == minusCC && pos != -1)) {
pos = source.indexOf(closeBracket, pos + 1);
}
if (pos === -1) {
pos = source.length
}
if (keepComments) {
children.push(source.substring(startCommentPos, pos + 1));
}
} else if (
source.charCodeAt(pos + 2) === openCornerBracketCC &&
source.charCodeAt(pos + 8) === openCornerBracketCC &&
source.substr(pos + 3, 5).toLowerCase() === 'cdata'
) {
// cdata
var cdataEndIndex = source.indexOf(']]>', pos);
if (cdataEndIndex == -1) {
children.push(source.substr(pos + 9));
pos = source.length;
} else {
children.push(source.substring(pos + 9, cdataEndIndex));
pos = cdataEndIndex + 3;
}
continue;
} else {
// doctypesupport
const startDoctype = pos + 1;
pos += 2;
var encapsuled = false;
while ((source.charCodeAt(pos) !== closeBracketCC || encapsuled === true) && source[pos]) {
if (source.charCodeAt(pos) === openCornerBracketCC) {
encapsuled = true;
} else if (encapsuled === true && source.charCodeAt(pos) === closeCornerBracketCC) {
encapsuled = false;
}
pos++;
}
children.push(source.substring(startDoctype, pos));
}
pos++;
continue;
}
var node = parseNode();
children.push(node);
if (node.tagName[0] === '?') {
children.push(...node.children);
node.children = [];
}
} else {
var text = parseText();
if (keepWhitespace) {
if (text.length > 0) {
children.push(text);
}
} else {
var trimmed = text.trim();
if (trimmed.length > 0) {
children.push(trimmed);
}
}
pos++;
}
}
return children;
}
/**
* returns the text outside of texts until the first '<'
*/
function parseText() {
var start = pos;
pos = source.indexOf(openBracket, pos) - 1;
if (pos === -2)
pos = source.length;
return source.slice(start, pos + 1);
}
/**
* returns text until the first nonAlphabetic letter
*/
var nameSpacer = '\r\n\t>/= ';
function parseName() {
var start = pos;
while (nameSpacer.indexOf(source[pos]) === -1 && source[pos]) {
pos++;
}
return source.slice(start, pos);
}
/**
* is parsing a node, including tagName, Attributes and its children,
* to parse children it uses the parseChildren again, that makes the parsing recursive
*/
var NoChildNodes = options.noChildNodes || ['img', 'br', 'input', 'meta', 'link', 'hr'];
function parseNode(): Node {
pos++;
const tagName = parseName();
const attributes: Record<string, string> = {};
let children: (string | Node)[] = [];
// parsing attributes
while (source.charCodeAt(pos) !== closeBracketCC && source[pos]) {
var c = source.charCodeAt(pos);
if ((c > 64 && c < 91) || (c > 96 && c < 123)) {
//if('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.indexOf(S[pos])!==-1 ){
var name = parseName();
// search beginning of the string
var code = source.charCodeAt(pos);
while (code && code !== singleQuoteCC && code !== doubleQuoteCC && !((code > 64 && code < 91) || (code > 96 && code < 123)) && code !== closeBracketCC) {
pos++;
code = source.charCodeAt(pos);
}
if (code === singleQuoteCC || code === doubleQuoteCC) {
var value = parseString();
if (pos === -1) {
return {
tagName,
attributes,
children,
};
}
} else {
value = null;
pos--;
}
attributes[name] = value;
}
pos++;
}
// optional parsing of children
if (source.charCodeAt(pos - 1) !== slashCC) {
if (tagName == "script") {
var start = pos + 1;
pos = source.indexOf('</script>', pos);
children = [source.slice(start, pos)];
pos += 9;
} else if (tagName == "style") {
var start = pos + 1;
pos = source.indexOf('</style>', pos);
children = [source.slice(start, pos)];
pos += 8;
} else if (NoChildNodes.indexOf(tagName) === -1) {
pos++;
children = parseChildren(tagName);
} else {
pos++
}
} else {
pos++;
}
return {
tagName,
attributes,
children,
};
}
/**
* is parsing a string, that starts with a char and with the same usually ' or "
*/
function parseString() {
var startChar = source[pos];
var startpos = pos + 1;
pos = source.indexOf(startChar, startpos)
return source.slice(startpos, pos);
}
/**
*
*/
function findElements() {
var r = new RegExp('\\s' + options.attrName + '\\s*=[\'"]' + options.attrValue + '[\'"]').exec(source)
if (r) {
return r.index;
} else {
return -1;
}
}
var out: (string | Node)[] | null = null;
if (options.attrValue !== undefined) {
options.attrName = options.attrName || 'id';
out = [];
while ((pos = findElements()) !== -1) {
pos = source.lastIndexOf('<', pos);
if (pos !== -1) {
out.push(parseNode());
}
source = source.substr(pos);
pos = 0;
}
} else if (options.parseNode) {
out = parseNode()
} else {
out = parseChildren('');
}
if (options.filter) {
out = filter(out, options.filter);
}
if (options.simplify) {
return simplify(Array.isArray(out) ? out : [out]);
}
if (options.setPos) {
out.pos = pos;
}
return out;
}
/**
* transform the DomObject to an object that is like the object of PHP`s simple_xmp_load_*() methods.
* this format helps you to write that is more likely to keep your program working, even if there a small changes in the XML schema.
* be aware, that it is not possible to reproduce the original xml from a simplified version, because the order of elements is not saved.
* therefore your program will be more flexible and easier to read.
*
* @param {tNode[]} children the childrenList
*/
export function simplify(children: any[]) {
var out = {};
if (!children.length) {
return '';
}
if (children.length === 1 && typeof children[0] == 'string') {
return children[0];
}
// map each object
children.forEach(function(child: { tagName: string | number; children: any; attributes: {}; }) {
if (typeof child !== 'object') {
return;
}
if (!out[child.tagName])
out[child.tagName] = [];
var kids = simplify(child.children);
out[child.tagName].push(kids);
if (Object.keys(child.attributes).length && typeof kids !== 'string') {
kids._attributes = child.attributes;
}
});
for (var i in out) {
if (out[i].length == 1) {
out[i] = out[i][0];
}
}
return out;
};
/**
* similar to simplify, but lost less
*
* @param {tNode[]} children the childrenList
*/
export function simplifyLostLess(children: any[], parentAttributes = {}) {
var out = {};
if (!children.length) {
return out;
}
if (children.length === 1 && typeof children[0] == 'string') {
return Object.keys(parentAttributes).length ? {
_attributes: parentAttributes,
value: children[0]
} : children[0];
}
// map each object
children.forEach(function(child: { tagName: string | number; children: any; attributes: {} | undefined; }) {
if (typeof child !== 'object') {
return;
}
if (!out[child.tagName])
out[child.tagName] = [];
var kids = simplifyLostLess(child.children || [], child.attributes);
out[child.tagName].push(kids);
if (Object.keys(child.attributes).length) {
kids._attributes = child.attributes;
}
});
return out;
};
/**
* behaves the same way as Array.filter, if the filter method return true, the element is in the resultList
* @params children{Array} the children of a node
* @param f{function} the filter method
*/
export function filter(children: any[], f: (arg0: any, arg1: any, arg2: number, arg3: string) => any, dept = 0, path = '') {
var out: any[] = [];
children.forEach(function(child: { children: any; tagName: string; }, i: string) {
if (typeof(child) === 'object' && f(child, i, dept, path)) out.push(child);
if (child.children) {
var kids = filter(child.children, f, dept + 1, (path ? path + '.' : '') + i + '.' + child.tagName);
out = out.concat(kids);
}
});
return out;
};
/**
* stringify a previously parsed string object.
* this is useful,
* 1. to remove whitespace
* 2. to recreate xml data, with some changed data.
* @param {tNode} O the object to Stringify
*/
export function stringify(O: any) {
var out = '';
function writeChildren(O: string | any[]) {
if (O) {
for (var i = 0; i < O.length; i++) {
if (typeof O[i] == 'string') {
out += O[i].trim();
} else {
writeNode(O[i]);
}
}
}
}
function writeNode(N: { tagName: string | string[]; attributes: { [x: string]: string; }; children: any; }) {
out += "<" + N.tagName;
for (var i in N.attributes) {
if (N.attributes[i] === null) {
out += ' ' + i;
} else if (N.attributes[i].indexOf('"') === -1) {
out += ' ' + i + '="' + N.attributes[i].trim() + '"';
} else {
out += ' ' + i + "='" + N.attributes[i].trim() + "'";
}
}
if (N.tagName[0] === '?') {
out += '?>';
return;
}
out += '>';
writeChildren(N.children);
out += '</' + N.tagName + '>';
}
writeChildren(O);
return out;
};
/**
* use this method to read the text content, of some node.
* It is great if you have mixed content like:
* this text has some <b>big</b> text and a <a href=''>link</a>
* @return {string}
*/
export function toContentString(tDom: string): string {
if (Array.isArray(tDom)) {
var out = '';
tDom.forEach(function(e) {
out += ' ' + toContentString(e);
out = out.trim();
});
return out;
} else if (typeof tDom === 'object') {
return toContentString(tDom.children)
} else {
return ' ' + tDom;
}
};
export function getElementById(S: string, id: any, simplified: any) {
var out = parse(S, {
attrValue: id
});
return simplified ? simplify(out) : out[0];
};
export function getElementsByClassName(S: string, classname: string, simplified: any) {
const out = parse(S, {
attrName: 'class',
attrValue: '[a-zA-Z0-9- ]*' + classname + '[a-zA-Z0-9- ]*'
});
return simplified ? simplify(out) : out;
};