3 changed files with 382 additions and 133 deletions
@ -0,0 +1,374 @@
@@ -0,0 +1,374 @@
|
||||
/* Copyright 2018 Mozilla Foundation |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
// The code for XMLParserBase copied from
|
||||
// https://github.com/mozilla/shumway/blob/16451d8836fa85f4b16eeda8b4bda2fa9e2b22b0/src/avm2/natives/xml.ts
|
||||
|
||||
const XMLParserErrorCode = { |
||||
NoError: 0, |
||||
EndOfDocument: -1, |
||||
UnterminatedCdat: -2, |
||||
UnterminatedXmlDeclaration: -3, |
||||
UnterminatedDoctypeDeclaration: -4, |
||||
UnterminatedComment: -5, |
||||
MalformedElement: -6, |
||||
OutOfMemory: -7, |
||||
UnterminatedAttributeValue: -8, |
||||
UnterminatedElement: -9, |
||||
ElementNeverBegun: -10, |
||||
}; |
||||
|
||||
function isWhitespace(s, index) { |
||||
const ch = s[index]; |
||||
return ch === ' ' || ch === '\n' || ch === '\r' || ch === '\t'; |
||||
} |
||||
|
||||
function isWhitespaceString(s) { |
||||
for (let i = 0, ii = s.length; i < ii; i++) { |
||||
if (!isWhitespace(s, i)) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
class XMLParserBase { |
||||
_resolveEntities(s) { |
||||
return s.replace(/&([^;]+);/g, function (all, entity) { |
||||
if (entity.substring(0, 2) === '#x') { |
||||
return String.fromCharCode(parseInt(entity.substring(2), 16)); |
||||
} else if (entity.substring(0, 1) === '#') { |
||||
return String.fromCharCode(parseInt(entity.substring(1), 10)); |
||||
} |
||||
switch (entity) { |
||||
case 'lt': |
||||
return '<'; |
||||
case 'gt': |
||||
return '>'; |
||||
case 'amp': |
||||
return '&'; |
||||
case 'quot': |
||||
return '\"'; |
||||
} |
||||
return this.onResolveEntity(entity); |
||||
}); |
||||
} |
||||
|
||||
_parseContent(s, start) { |
||||
let pos = start, name, attributes = []; |
||||
|
||||
function skipWs() { |
||||
while (pos < s.length && isWhitespace(s, pos)) { |
||||
++pos; |
||||
} |
||||
} |
||||
|
||||
while (pos < s.length && !isWhitespace(s, pos) && |
||||
s[pos] !== '>' && s[pos] !== '/') { |
||||
++pos; |
||||
} |
||||
name = s.substring(start, pos); |
||||
skipWs(); |
||||
while (pos < s.length && s[pos] !== '>' && |
||||
s[pos] !== '/' && s[pos] !== '?') { |
||||
skipWs(); |
||||
let attrName = '', attrValue = ''; |
||||
while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== '=') { |
||||
attrName += s[pos]; |
||||
++pos; |
||||
} |
||||
skipWs(); |
||||
if (s[pos] !== '=') { |
||||
return null; |
||||
} |
||||
++pos; |
||||
skipWs(); |
||||
const attrEndChar = s[pos]; |
||||
if (attrEndChar !== '\"' && attrEndChar !== '\'') { |
||||
return null; |
||||
} |
||||
const attrEndIndex = s.indexOf(attrEndChar, ++pos); |
||||
if (attrEndIndex < 0) { |
||||
return null; |
||||
} |
||||
attrValue = s.substring(pos, attrEndIndex); |
||||
attributes.push({ |
||||
name: attrName, |
||||
value: this._resolveEntities(attrValue), |
||||
}); |
||||
pos = attrEndIndex + 1; |
||||
skipWs(); |
||||
} |
||||
return { |
||||
name, |
||||
attributes, |
||||
parsed: pos - start, |
||||
}; |
||||
} |
||||
|
||||
_parseProcessingInstruction(s, start) { |
||||
let pos = start, name, value; |
||||
|
||||
function skipWs() { |
||||
while (pos < s.length && isWhitespace(s, pos)) { |
||||
++pos; |
||||
} |
||||
} |
||||
|
||||
while (pos < s.length && !isWhitespace(s, pos) && |
||||
s[pos] !== '>' && s[pos] !== '/') { |
||||
++pos; |
||||
} |
||||
name = s.substring(start, pos); |
||||
skipWs(); |
||||
const attrStart = pos; |
||||
while (pos < s.length && (s[pos] !== '?' || s[pos + 1] !== '>')) { |
||||
++pos; |
||||
} |
||||
value = s.substring(attrStart, pos); |
||||
return { |
||||
name, |
||||
value, |
||||
parsed: pos - start, |
||||
}; |
||||
} |
||||
|
||||
parseXml(s) { |
||||
let i = 0; |
||||
while (i < s.length) { |
||||
const ch = s[i]; |
||||
let j = i; |
||||
if (ch === '<') { |
||||
++j; |
||||
const ch2 = s[j]; |
||||
let q; |
||||
switch (ch2) { |
||||
case '/': |
||||
++j; |
||||
q = s.indexOf('>', j); |
||||
if (q < 0) { |
||||
this.onError(XMLParserErrorCode.UnterminatedElement); |
||||
return; |
||||
} |
||||
this.onEndElement(s.substring(j, q)); |
||||
j = q + 1; |
||||
break; |
||||
case '?': |
||||
++j; |
||||
const pi = this._parseProcessingInstruction(s, j); |
||||
if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== '?>') { |
||||
this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration); |
||||
return; |
||||
} |
||||
this.onPi(pi.name, pi.value); |
||||
j += pi.parsed + 2; |
||||
break; |
||||
case '!': |
||||
if (s.substring(j + 1, j + 3) === '--') { |
||||
q = s.indexOf('-->', j + 3); |
||||
if (q < 0) { |
||||
this.onError(XMLParserErrorCode.UnterminatedComment); |
||||
return; |
||||
} |
||||
this.onComment(s.substring(j + 3, q)); |
||||
j = q + 3; |
||||
} else if (s.substring(j + 1, j + 8) === '[CDATA[') { |
||||
q = s.indexOf(']]>', j + 8); |
||||
if (q < 0) { |
||||
this.onError(XMLParserErrorCode.UnterminatedCdat); |
||||
return; |
||||
} |
||||
this.onCdata(s.substring(j + 8, q)); |
||||
j = q + 3; |
||||
} else if (s.substring(j + 1, j + 8) === 'DOCTYPE') { |
||||
const q2 = s.indexOf('[', j + 8); |
||||
let complexDoctype = false; |
||||
q = s.indexOf('>', j + 8); |
||||
if (q < 0) { |
||||
this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration); |
||||
return; |
||||
} |
||||
if (q2 > 0 && q > q2) { |
||||
q = s.indexOf(']>', j + 8); |
||||
if (q < 0) { |
||||
this.onError( |
||||
XMLParserErrorCode.UnterminatedDoctypeDeclaration); |
||||
return; |
||||
} |
||||
complexDoctype = true; |
||||
} |
||||
const doctypeContent = |
||||
s.substring(j + 8, q + (complexDoctype ? 1 : 0)); |
||||
this.onDoctype(doctypeContent); |
||||
j = q + (complexDoctype ? 2 : 1); |
||||
} else { |
||||
this.onError(XMLParserErrorCode.MalformedElement); |
||||
return; |
||||
} |
||||
break; |
||||
default: |
||||
const content = this._parseContent(s, j); |
||||
if (content === null) { |
||||
this.onError(XMLParserErrorCode.MalformedElement); |
||||
return; |
||||
} |
||||
let isClosed = false; |
||||
if (s.substring(j + content.parsed, |
||||
j + content.parsed + 2) === '/>') { |
||||
isClosed = true; |
||||
} else if (s.substring(j + content.parsed, |
||||
j + content.parsed + 1) !== '>') { |
||||
this.onError(XMLParserErrorCode.UnterminatedElement); |
||||
return; |
||||
} |
||||
this.onBeginElement(content.name, content.attributes, isClosed); |
||||
j += content.parsed + (isClosed ? 2 : 1); |
||||
break; |
||||
} |
||||
} else { |
||||
while (j < s.length && s[j] !== '<') { |
||||
j++; |
||||
} |
||||
const text = s.substring(i, j); |
||||
this.onText(this._resolveEntities(text)); |
||||
} |
||||
i = j; |
||||
} |
||||
} |
||||
|
||||
onResolveEntity(name) { |
||||
return `&${name};`; |
||||
} |
||||
|
||||
onPi(name, value) { } |
||||
|
||||
onComment(text) { } |
||||
|
||||
onCdata(text) { } |
||||
|
||||
onDoctype(doctypeContent) { } |
||||
|
||||
onText(text) { } |
||||
|
||||
onBeginElement(name, attributes, isEmpty) { } |
||||
|
||||
onEndElement(name) { } |
||||
|
||||
onError(code) { } |
||||
} |
||||
|
||||
class SimpleDOMNode { |
||||
constructor(nodeName, nodeValue) { |
||||
this.nodeName = nodeName; |
||||
this.nodeValue = nodeValue; |
||||
|
||||
Object.defineProperty(this, 'parentNode', { value: null, writable: true, }); |
||||
} |
||||
|
||||
get firstChild() { |
||||
return this.childNodes[0]; |
||||
} |
||||
|
||||
get nextSibling() { |
||||
let index = this.parentNode.childNodes.indexOf(this); |
||||
return this.parentNode.childNodes[index + 1]; |
||||
} |
||||
|
||||
get textContent() { |
||||
if (!this.childNodes) { |
||||
return this.nodeValue || ''; |
||||
} |
||||
return this.childNodes.map(function(child) { |
||||
return child.textContent; |
||||
}).join(''); |
||||
} |
||||
|
||||
hasChildNodes() { |
||||
return this.childNodes && this.childNodes.length > 0; |
||||
} |
||||
} |
||||
|
||||
class SimpleXMLParser extends XMLParserBase { |
||||
constructor() { |
||||
super(); |
||||
this._currentFragment = null; |
||||
this._stack = null; |
||||
this._errorCode = XMLParserErrorCode.NoError; |
||||
} |
||||
|
||||
parseFromString(data) { |
||||
this._currentFragment = []; |
||||
this._stack = []; |
||||
this._errorCode = XMLParserErrorCode.NoError; |
||||
|
||||
this.parseXml(data); |
||||
|
||||
if (this._errorCode !== XMLParserErrorCode.NoError) { |
||||
return undefined; // return undefined on error
|
||||
} |
||||
|
||||
// We should only have one root.
|
||||
const [documentElement] = this._currentFragment; |
||||
return { documentElement, }; |
||||
} |
||||
|
||||
onResolveEntity(name) { |
||||
switch (name) { |
||||
case 'apos': |
||||
return '\''; |
||||
} |
||||
return super.onResolveEntity(name); |
||||
} |
||||
|
||||
onText(text) { |
||||
if (isWhitespaceString(text)) { |
||||
return; |
||||
} |
||||
const node = new SimpleDOMNode('#text', text); |
||||
this._currentFragment.push(node); |
||||
} |
||||
|
||||
onCdata(text) { |
||||
const node = new SimpleDOMNode('#text', text); |
||||
this._currentFragment.push(node); |
||||
} |
||||
|
||||
onBeginElement(name, attributes, isEmpty) { |
||||
const node = new SimpleDOMNode(name); |
||||
node.childNodes = []; |
||||
this._currentFragment.push(node); |
||||
if (isEmpty) { |
||||
return; |
||||
} |
||||
this._stack.push(this._currentFragment); |
||||
this._currentFragment = node.childNodes; |
||||
} |
||||
|
||||
onEndElement(name) { |
||||
this._currentFragment = this._stack.pop(); |
||||
const lastElement = this._currentFragment[this._currentFragment.length - 1]; |
||||
for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) { |
||||
lastElement.childNodes[i].parentNode = lastElement; |
||||
} |
||||
} |
||||
|
||||
onError(code) { |
||||
this._errorCode = code; |
||||
} |
||||
} |
||||
|
||||
export { |
||||
SimpleXMLParser, |
||||
}; |
Loading…
Reference in new issue