package/package.json000644 000765 000024 0000001657 12463447617013044 0ustar00000000 000000 { "name": "domutils", "version": "1.5.1", "description": "utilities for working with htmlparser2's dom", "main": "index.js", "directories": { "test": "tests" }, "scripts": { "test": "mocha test/tests/**.js && jshint index.js test/**/*.js lib/*.js" }, "repository": { "type": "git", "url": "git://github.com/FB55/domutils.git" }, "keywords": [ "dom", "htmlparser2" ], "dependencies": { "dom-serializer": "0", "domelementtype": "1" }, "devDependencies": { "htmlparser2": "~3.3.0", "domhandler": "2", "jshint": "~2.3.0", "mocha": "~1.15.1" }, "author": "Felix Boehm ", "jshintConfig": { "proto": true, "unused": true, "eqnull": true, "undef": true, "quotmark": "double", "eqeqeq": true, "trailing": true, "node": true, "globals": { "describe": true, "it": true, "beforeEach": true } } } package/.npmignore000644 000765 000024 0000000015 12463447602012532 0ustar00000000 000000 node_modules package/LICENSE000644 000765 000024 0000002354 12463447602011550 0ustar00000000 000000 Copyright (c) Felix Böhm All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package/index.js000644 000765 000024 0000000502 12463447602012201 0ustar00000000 000000 var DomUtils = module.exports; [ require("./lib/stringify"), require("./lib/traversal"), require("./lib/manipulation"), require("./lib/querying"), require("./lib/legacy"), require("./lib/helpers") ].forEach(function(ext){ Object.keys(ext).forEach(function(key){ DomUtils[key] = ext[key].bind(DomUtils); }); }); package/lib/helpers.js000644 000765 000024 0000007532 12463447602013314 0ustar00000000 000000 // removeSubsets // Given an array of nodes, remove any member that is contained by another. exports.removeSubsets = function(nodes) { var idx = nodes.length, node, ancestor, replace; // Check if each node (or one of its ancestors) is already contained in the // array. while (--idx > -1) { node = ancestor = nodes[idx]; // Temporarily remove the node under consideration nodes[idx] = null; replace = true; while (ancestor) { if (nodes.indexOf(ancestor) > -1) { replace = false; nodes.splice(idx, 1); break; } ancestor = ancestor.parent; } // If the node has been found to be unique, re-insert it. if (replace) { nodes[idx] = node; } } return nodes; }; // Source: http://dom.spec.whatwg.org/#dom-node-comparedocumentposition var POSITION = { DISCONNECTED: 1, PRECEDING: 2, FOLLOWING: 4, CONTAINS: 8, CONTAINED_BY: 16 }; // Compare the position of one node against another node in any other document. // The return value is a bitmask with the following values: // // document order: // > There is an ordering, document order, defined on all the nodes in the // > document corresponding to the order in which the first character of the // > XML representation of each node occurs in the XML representation of the // > document after expansion of general entities. Thus, the document element // > node will be the first node. Element nodes occur before their children. // > Thus, document order orders element nodes in order of the occurrence of // > their start-tag in the XML (after expansion of entities). The attribute // > nodes of an element occur after the element and before its children. The // > relative order of attribute nodes is implementation-dependent./ // Source: // http://www.w3.org/TR/DOM-Level-3-Core/glossary.html#dt-document-order // // @argument {Node} nodaA The first node to use in the comparison // @argument {Node} nodeB The second node to use in the comparison // // @return {Number} A bitmask describing the input nodes' relative position. // See http://dom.spec.whatwg.org/#dom-node-comparedocumentposition for // a description of these values. var comparePos = exports.compareDocumentPosition = function(nodeA, nodeB) { var aParents = []; var bParents = []; var current, sharedParent, siblings, aSibling, bSibling, idx; if (nodeA === nodeB) { return 0; } current = nodeA; while (current) { aParents.unshift(current); current = current.parent; } current = nodeB; while (current) { bParents.unshift(current); current = current.parent; } idx = 0; while (aParents[idx] === bParents[idx]) { idx++; } if (idx === 0) { return POSITION.DISCONNECTED; } sharedParent = aParents[idx - 1]; siblings = sharedParent.children; aSibling = aParents[idx]; bSibling = bParents[idx]; if (siblings.indexOf(aSibling) > siblings.indexOf(bSibling)) { if (sharedParent === nodeB) { return POSITION.FOLLOWING | POSITION.CONTAINED_BY; } return POSITION.FOLLOWING; } else { if (sharedParent === nodeA) { return POSITION.PRECEDING | POSITION.CONTAINS; } return POSITION.PRECEDING; } }; // Sort an array of nodes based on their relative position in the document and // remove any duplicate nodes. If the array contains nodes that do not belong // to the same document, sort order is unspecified. // // @argument {Array} nodes Array of DOM nodes // // @returns {Array} collection of unique nodes, sorted in document order exports.uniqueSort = function(nodes) { var idx = nodes.length, node, position; nodes = nodes.slice(); while (--idx > -1) { node = nodes[idx]; position = nodes.indexOf(node); if (position > -1 && position < idx) { nodes.splice(idx, 1); } } nodes.sort(function(a, b) { var relative = comparePos(a, b); if (relative & POSITION.PRECEDING) { return -1; } else if (relative & POSITION.FOLLOWING) { return 1; } return 0; }); return nodes; }; package/lib/legacy.js000644 000765 000024 0000004671 12463447602013117 0ustar00000000 000000 var ElementType = require("domelementtype"); var isTag = exports.isTag = ElementType.isTag; exports.testElement = function(options, element){ for(var key in options){ if(!options.hasOwnProperty(key)); else if(key === "tag_name"){ if(!isTag(element) || !options.tag_name(element.name)){ return false; } } else if(key === "tag_type"){ if(!options.tag_type(element.type)) return false; } else if(key === "tag_contains"){ if(isTag(element) || !options.tag_contains(element.data)){ return false; } } else if(!element.attribs || !options[key](element.attribs[key])){ return false; } } return true; }; var Checks = { tag_name: function(name){ if(typeof name === "function"){ return function(elem){ return isTag(elem) && name(elem.name); }; } else if(name === "*"){ return isTag; } else { return function(elem){ return isTag(elem) && elem.name === name; }; } }, tag_type: function(type){ if(typeof type === "function"){ return function(elem){ return type(elem.type); }; } else { return function(elem){ return elem.type === type; }; } }, tag_contains: function(data){ if(typeof data === "function"){ return function(elem){ return !isTag(elem) && data(elem.data); }; } else { return function(elem){ return !isTag(elem) && elem.data === data; }; } } }; function getAttribCheck(attrib, value){ if(typeof value === "function"){ return function(elem){ return elem.attribs && value(elem.attribs[attrib]); }; } else { return function(elem){ return elem.attribs && elem.attribs[attrib] === value; }; } } function combineFuncs(a, b){ return function(elem){ return a(elem) || b(elem); }; } exports.getElements = function(options, element, recurse, limit){ var funcs = Object.keys(options).map(function(key){ var value = options[key]; return key in Checks ? Checks[key](value) : getAttribCheck(key, value); }); return funcs.length === 0 ? [] : this.filter( funcs.reduce(combineFuncs), element, recurse, limit ); }; exports.getElementById = function(id, element, recurse){ if(!Array.isArray(element)) element = [element]; return this.findOne(getAttribCheck("id", id), element, recurse !== false); }; exports.getElementsByTagName = function(name, element, recurse, limit){ return this.filter(Checks.tag_name(name), element, recurse, limit); }; exports.getElementsByTagType = function(type, element, recurse, limit){ return this.filter(Checks.tag_type(type), element, recurse, limit); }; package/lib/manipulation.js000644 000765 000024 0000002772 12463447602014353 0ustar00000000 000000 exports.removeElement = function(elem){ if(elem.prev) elem.prev.next = elem.next; if(elem.next) elem.next.prev = elem.prev; if(elem.parent){ var childs = elem.parent.children; childs.splice(childs.lastIndexOf(elem), 1); } }; exports.replaceElement = function(elem, replacement){ var prev = replacement.prev = elem.prev; if(prev){ prev.next = replacement; } var next = replacement.next = elem.next; if(next){ next.prev = replacement; } var parent = replacement.parent = elem.parent; if(parent){ var childs = parent.children; childs[childs.lastIndexOf(elem)] = replacement; } }; exports.appendChild = function(elem, child){ child.parent = elem; if(elem.children.push(child) !== 1){ var sibling = elem.children[elem.children.length - 2]; sibling.next = child; child.prev = sibling; child.next = null; } }; exports.append = function(elem, next){ var parent = elem.parent, currNext = elem.next; next.next = currNext; next.prev = elem; elem.next = next; next.parent = parent; if(currNext){ currNext.prev = next; if(parent){ var childs = parent.children; childs.splice(childs.lastIndexOf(currNext), 0, next); } } else if(parent){ parent.children.push(next); } }; exports.prepend = function(elem, prev){ var parent = elem.parent; if(parent){ var childs = parent.children; childs.splice(childs.lastIndexOf(elem), 0, prev); } if(elem.prev){ elem.prev.next = prev; } prev.parent = parent; prev.prev = elem.prev; prev.next = elem; elem.prev = prev; }; package/lib/querying.js000644 000765 000024 0000003513 12463447602013510 0ustar00000000 000000 var isTag = require("domelementtype").isTag; module.exports = { filter: filter, find: find, findOneChild: findOneChild, findOne: findOne, existsOne: existsOne, findAll: findAll }; function filter(test, element, recurse, limit){ if(!Array.isArray(element)) element = [element]; if(typeof limit !== "number" || !isFinite(limit)){ limit = Infinity; } return find(test, element, recurse !== false, limit); } function find(test, elems, recurse, limit){ var result = [], childs; for(var i = 0, j = elems.length; i < j; i++){ if(test(elems[i])){ result.push(elems[i]); if(--limit <= 0) break; } childs = elems[i].children; if(recurse && childs && childs.length > 0){ childs = find(test, childs, recurse, limit); result = result.concat(childs); limit -= childs.length; if(limit <= 0) break; } } return result; } function findOneChild(test, elems){ for(var i = 0, l = elems.length; i < l; i++){ if(test(elems[i])) return elems[i]; } return null; } function findOne(test, elems){ var elem = null; for(var i = 0, l = elems.length; i < l && !elem; i++){ if(!isTag(elems[i])){ continue; } else if(test(elems[i])){ elem = elems[i]; } else if(elems[i].children.length > 0){ elem = findOne(test, elems[i].children); } } return elem; } function existsOne(test, elems){ for(var i = 0, l = elems.length; i < l; i++){ if( isTag(elems[i]) && ( test(elems[i]) || ( elems[i].children.length > 0 && existsOne(test, elems[i].children) ) ) ){ return true; } } return false; } function findAll(test, elems){ var result = []; for(var i = 0, j = elems.length; i < j; i++){ if(!isTag(elems[i])) continue; if(test(elems[i])) result.push(elems[i]); if(elems[i].children.length > 0){ result = result.concat(findAll(test, elems[i].children)); } } return result; } package/lib/stringify.js000644 000765 000024 0000001137 12463447602013663 0ustar00000000 000000 var ElementType = require("domelementtype"), getOuterHTML = require("dom-serializer"), isTag = ElementType.isTag; module.exports = { getInnerHTML: getInnerHTML, getOuterHTML: getOuterHTML, getText: getText }; function getInnerHTML(elem, opts){ return elem.children ? elem.children.map(function(elem){ return getOuterHTML(elem, opts); }).join("") : ""; } function getText(elem){ if(Array.isArray(elem)) return elem.map(getText).join(""); if(isTag(elem) || elem.type === ElementType.CDATA) return getText(elem.children); if(elem.type === ElementType.Text) return elem.data; return ""; } package/lib/traversal.js000644 000765 000024 0000001043 12463447602013644 0ustar00000000 000000 var getChildren = exports.getChildren = function(elem){ return elem.children; }; var getParent = exports.getParent = function(elem){ return elem.parent; }; exports.getSiblings = function(elem){ var parent = getParent(elem); return parent ? getChildren(parent) : [elem]; }; exports.getAttributeValue = function(elem, name){ return elem.attribs && elem.attribs[name]; }; exports.hasAttrib = function(elem, name){ return !!elem.attribs && hasOwnProperty.call(elem.attribs, name); }; exports.getName = function(elem){ return elem.name; }; package/readme.md000644 000765 000024 0000000055 12463447602012316 0ustar00000000 000000 utilities for working with htmlparser2's dom package/test/fixture.js000644 000765 000024 0000000302 12463447602013535 0ustar00000000 000000 var makeDom = require("./utils").makeDom; var markup = Array(21).join( " text " ); module.exports = makeDom(markup); package/test/utils.js000644 000765 000024 0000000345 12463447602013216 0ustar00000000 000000 var htmlparser = require("htmlparser2"); exports.makeDom = function(markup) { var handler = new htmlparser.DomHandler(), parser = new htmlparser.Parser(handler); parser.write(markup); parser.done(); return handler.dom; }; package/test/tests/helpers.js000644 000765 000024 0000005205 12463447602014662 0ustar00000000 000000 var makeDom = require("../utils").makeDom; var helpers = require("../.."); var assert = require("assert"); describe("helpers", function() { describe("removeSubsets", function() { var removeSubsets = helpers.removeSubsets; var dom = makeDom("

")[0]; it("removes identical trees", function() { var matches = removeSubsets([dom, dom]); assert.equal(matches.length, 1); }); it("Removes subsets found first", function() { var matches = removeSubsets([dom, dom.children[0].children[0]]); assert.equal(matches.length, 1); }); it("Removes subsets found last", function() { var matches = removeSubsets([dom.children[0], dom]); assert.equal(matches.length, 1); }); it("Does not remove unique trees", function() { var matches = removeSubsets([dom.children[0], dom.children[1]]); assert.equal(matches.length, 2); }); }); describe("compareDocumentPosition", function() { var compareDocumentPosition = helpers.compareDocumentPosition; var markup = "

"; var dom = makeDom(markup)[0]; var p = dom.children[0]; var span = p.children[0]; var a = dom.children[1]; it("reports when the first node occurs before the second indirectly", function() { assert.equal(compareDocumentPosition(span, a), 2); }); it("reports when the first node contains the second", function() { assert.equal(compareDocumentPosition(p, span), 10); }); it("reports when the first node occurs after the second indirectly", function() { assert.equal(compareDocumentPosition(a, span), 4); }); it("reports when the first node is contained by the second", function() { assert.equal(compareDocumentPosition(span, p), 20); }); it("reports when the nodes belong to separate documents", function() { var other = makeDom(markup)[0].children[0].children[0]; assert.equal(compareDocumentPosition(span, other), 1); }); it("reports when the nodes are identical", function() { assert.equal(compareDocumentPosition(span, span), 0); }); }); describe("uniqueSort", function() { var uniqueSort = helpers.uniqueSort; var dom, p, span, a; beforeEach(function() { dom = makeDom("

")[0]; p = dom.children[0]; span = p.children[0]; a = dom.children[1]; }); it("leaves unique elements untouched", function() { assert.deepEqual(uniqueSort([p, a]), [p, a]); }); it("removes duplicate elements", function() { assert.deepEqual(uniqueSort([p, a, p]), [p, a]); }); it("sorts nodes in document order", function() { assert.deepEqual(uniqueSort([a, dom, span, p]), [dom, p, span, a]); }); }); }); package/test/tests/legacy.js000644 000765 000024 0000006175 12463447602014473 0ustar00000000 000000 var DomUtils = require("../.."); var fixture = require("../fixture"); var assert = require("assert"); // Set up expected structures var expected = { idAsdf: fixture[1], tag2: [], typeScript: [] }; for (var idx = 0; idx < 20; ++idx) { expected.tag2.push(fixture[idx*2 + 1].children[5]); expected.typeScript.push(fixture[idx*2 + 1].children[1]); } describe("legacy", function() { describe("getElements", function() { var getElements = DomUtils.getElements; it("returns the node with the specified ID", function() { assert.deepEqual( getElements({ id: "asdf" }, fixture, true, 1), [expected.idAsdf] ); }); it("returns empty array for unknown IDs", function() { assert.deepEqual(getElements({ id: "asdfs" }, fixture, true), []); }); it("returns the nodes with the specified tag name", function() { assert.deepEqual( getElements({ tag_name:"tag2" }, fixture, true), expected.tag2 ); }); it("returns empty array for unknown tag names", function() { assert.deepEqual( getElements({ tag_name : "asdfs" }, fixture, true), [] ); }); it("returns the nodes with the specified tag type", function() { assert.deepEqual( getElements({ tag_type: "script" }, fixture, true), expected.typeScript ); }); it("returns empty array for unknown tag types", function() { assert.deepEqual( getElements({ tag_type: "video" }, fixture, true), [] ); }); }); describe("getElementById", function() { var getElementById = DomUtils.getElementById; it("returns the specified node", function() { assert.equal( expected.idAsdf, getElementById("asdf", fixture, true) ); }); it("returns `null` for unknown IDs", function() { assert.equal(null, getElementById("asdfs", fixture, true)); }); }); describe("getElementsByTagName", function() { var getElementsByTagName = DomUtils.getElementsByTagName; it("returns the specified nodes", function() { assert.deepEqual( getElementsByTagName("tag2", fixture, true), expected.tag2 ); }); it("returns empty array for unknown tag names", function() { assert.deepEqual( getElementsByTagName("tag23", fixture, true), [] ); }); }); describe("getElementsByTagType", function() { var getElementsByTagType = DomUtils.getElementsByTagType; it("returns the specified nodes", function() { assert.deepEqual( getElementsByTagType("script", fixture, true), expected.typeScript ); }); it("returns empty array for unknown tag types", function() { assert.deepEqual( getElementsByTagType("video", fixture, true), [] ); }); }); describe("getOuterHTML", function() { var getOuterHTML = DomUtils.getOuterHTML; it("Correctly renders the outer HTML", function() { assert.equal( getOuterHTML(fixture[1]), " text " ); }); }); describe("getInnerHTML", function() { var getInnerHTML = DomUtils.getInnerHTML; it("Correctly renders the inner HTML", function() { assert.equal( getInnerHTML(fixture[1]), " text " ); }); }); }); package/test/tests/traversal.js000644 000765 000024 0000000646 12463447602015227 0ustar00000000 000000 var makeDom = require("../utils").makeDom; var traversal = require("../.."); var assert = require("assert"); describe("traversal", function() { describe("hasAttrib", function() { var hasAttrib = traversal.hasAttrib; it("doesn't throw on text nodes", function() { var dom = makeDom("textnode"); assert.doesNotThrow(function() { hasAttrib(dom[0], "some-attrib"); }); }); }); });