xmldiff-0.6.10/0000755000201200020120000000000011441141234012134 5ustar alainalainxmldiff-0.6.10/setup.py0000644000201200020120000001414611434502003013651 0ustar alainalain#!/usr/bin/env python # pylint: disable-msg=W0142, W0403,W0404, W0613,W0622,W0622, W0704, R0904 # # Copyright (c) 2003-2010 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Generic Setup script, takes package info from __pkginfo__.py file """ import os import sys import shutil from distutils.core import setup from distutils.command import install_lib from os.path import isdir, exists, join, walk # import required features from __pkginfo__ import modname, version, license, description, \ web, author, author_email # import optional features try: from __pkginfo__ import distname except ImportError: distname = modname try: from __pkginfo__ import scripts except ImportError: scripts = [] try: from __pkginfo__ import data_files except ImportError: data_files = None try: from __pkginfo__ import subpackage_of except ImportError: subpackage_of = None try: from __pkginfo__ import include_dirs except ImportError: include_dirs = [] try: from __pkginfo__ import ext_modules except ImportError: ext_modules = None try: from __pkginfo__ import long_description except ImportError: long_description = file('README').read() BASE_BLACKLIST = ('CVS', '.hg', 'debian', 'dist', 'build', '__buildlog') IGNORED_EXTENSIONS = ('.pyc', '.pyo', '.elc') def ensure_scripts(linux_scripts): """ Creates the proper script names required for each platform (taken from 4Suite) """ from distutils import util if util.get_platform()[:3] == 'win': scripts_ = [script + '.bat' for script in linux_scripts] else: scripts_ = linux_scripts return scripts_ def get_packages(directory, prefix): """return a list of subpackages for the given directory """ result = [] for package in os.listdir(directory): absfile = join(directory, package) if isdir(absfile): if exists(join(absfile, '__init__.py')) or \ package in ('test', 'tests'): if prefix: result.append('%s.%s' % (prefix, package)) else: result.append(package) result += get_packages(absfile, result[-1]) return result def export(from_dir, to_dir, blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS): """make a mirror of from_dir in to_dir, omitting directories and files listed in the black list """ def make_mirror(arg, directory, fnames): """walk handler""" for norecurs in blacklist: try: fnames.remove(norecurs) except ValueError: pass for filename in fnames: # don't include binary files if filename[-4:] in ignore_ext: continue if filename[-1] == '~': continue src = '%s/%s' % (directory, filename) dest = to_dir + src[len(from_dir):] print >> sys.stderr, src, '->', dest if os.path.isdir(src): if not exists(dest): os.mkdir(dest) else: if exists(dest): os.remove(dest) shutil.copy2(src, dest) try: os.mkdir(to_dir) except OSError, ex: # file exists ? import errno if ex.errno != errno.EEXIST: raise walk(from_dir, make_mirror, None) EMPTY_FILE = '"""generated file, don\'t modify or your data will be lost"""\n' class MyInstallLib(install_lib.install_lib): """extend install_lib command to handle package __init__.py and include_dirs variable if necessary """ def run(self): """overridden from install_lib class""" install_lib.install_lib.run(self) # create Products.__init__.py if needed if subpackage_of: product_init = join(self.install_dir, subpackage_of, '__init__.py') if not exists(product_init): self.announce('creating %s' % product_init) stream = open(product_init, 'w') stream.write(EMPTY_FILE) stream.close() # manually install included directories if any if include_dirs: if subpackage_of: base = join(subpackage_of, modname) else: base = modname for directory in include_dirs: dest = join(self.install_dir, base, directory) export(directory, dest) def install(**kwargs): """setup entry point""" if subpackage_of: package = subpackage_of + '.' + modname kwargs['package_dir'] = {package : '.'} packages = [package] + get_packages(os.getcwd(), package) else: kwargs['package_dir'] = {modname : '.'} packages = [modname] + get_packages(os.getcwd(), modname) kwargs['packages'] = packages return setup(name = distname, version = version, license = license, description = description, long_description = long_description, author = author, author_email = author_email, url = web, scripts = ensure_scripts(scripts), data_files=data_files, ext_modules=ext_modules, cmdclass={'install_lib': MyInstallLib}, **kwargs ) if __name__ == '__main__' : install() xmldiff-0.6.10/ezs.py0000644000201200020120000003764511434467463013347 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # """ this file provides the Zhang and Shasha tree to tree correction algorithm extended by Barnard, Clark and Duncan """ from xmldiff.objects import * from xmldiff.misc import init_matrix ####### Actions used by ezs algorithm ####### EZS_A_TYPE = 0 EZS_A_COST = 1 EZS_A_FCOST = 2 EZS_A_N1 = 3 EZS_A_N2 = 4 EZS_A_DEL = 5 # node's attributes for ezs algorithm N_KEYROOT = NSIZE # 1 if node is a keyroot, either 0 N_LEFTMOST = N_KEYROOT+1 # index of leftmost child (see tree2tree) def _nodes_equal(n1, n2): """ compare name and value of 2 xml nodes n1 and n2 """ if n1 is None: return n2 is None elif n2 is None: return FALSE return n1[N_VALUE] == n2[N_VALUE] def trees_equal(n1, n2): """ return true if the node n1 and n2 are equivalent subtrees """ if not _nodes_equal(n1, n2): return FALSE elif n1 is not None: # recursion on each n1 and n2's child if n1[N_ISSUE] != n2[N_ISSUE]: return FALSE else: for child1, child2 in zip(n1[N_CHILDS], n2[N_CHILDS]): if not trees_equal(child1, child2): return FALSE return TRUE def choose(f_actions, desc_list): """ return the best action (min forest distance) in the description list desc_list : [index1, index2, Action] """ best_action = [C_INFINI] for i, j, action in desc_list: fcost = f_actions[i][j][-1][EZS_A_FCOST] + action[EZS_A_COST] if fcost < best_action[0]: best_action = [fcost, i, j, action] actions_stack = f_actions[best_action[1]][best_action[2]][:] best_action[3][EZS_A_FCOST] = best_action[0] add_action(actions_stack, best_action[3]) return actions_stack def add_action(actions_list, action): """ Test action and add it to the list if it's a real action """ if action[EZS_A_COST] > 0: actions_list.append(action) ######## COST CALCUL ######## C_INFINI = 9999999 C_SWAP = 1 C_APPEND = 1 C_REMOVE = 1 def gamma(ni, nj): """ return a cost which represents the differents betwen ni and nj today, return 0 if ni.nodeName equal nj.nodeName, 1 else """ if ni == nj : return 0 elif ni is not None and nj is not None and ni[N_VALUE] == nj[N_VALUE]: return 0 else: return C_INFINI def swap_trees(ni, sib_o_fi, nj, sib_o_fj): """ return the cost to swap subtree ni et nj (sib_o_fi and sib_o_fi are the next sibbling node respectively for n1 and nj) """ if trees_equal(ni, sib_o_fj) and trees_equal(sib_o_fi, nj) \ and ni[N_NAME] != '/' and nj[N_NAME] != '/': return C_SWAP else: return C_INFINI def remove_tree(ni): """ return the cost to remove subtree ni """ return (ni[N_ISSUE] + 1) * C_REMOVE def append_tree(ni): """ return the cost to append subtree ni """ return (ni[N_ISSUE] + 1) * C_APPEND ##### TREE 2 TREE ALGORITHMs ### class EzsCorrector: """ this class uses the Zhang and Shasha algorithm extended by Barnard, Clark and Duncan """ ## * x, y -> postordered number of nodes being processed ## * nl1, nl2: node[MAXNODES] -> nodes list ordered in the post-ordered ## number extracted respectively from tree1 and tree2 (size1 and size2 elmts) ## * actions[MAXNODES][MAXNODES] -> actions table working as tree distances ## table (consideres only descendants). actions[i][j] finally contain a ## list of actions which represents the best way to transform node ## post-numbered i (from source tree) into node post-numbered j (from ## destination tree) ## * f_actions[FDSIZE][FDSIZE] -> actions table working as forest distances ## table (forest distance is the distance between 2 nodes in their left ## siblings context) ## since nodes are post numbered, nl[nl[i]->leftmost]-1] = root of the previous ## subtree for nl[i] def __init__(self, formatter): self._formatter = formatter def process_trees(self, tree1, tree2): """ the Extended Zhang and Shasha tree 2 tree correction algorithm (Barnard, Clarke, Duncan) """ ### initialisations ### nl1, nl2 = [], [] self._formatter.init() # add attributes to trees self._post_order(tree1, nl1, TRUE) self._post_order(tree2, nl2, TRUE) # numbered tree with required attributes size1, size2 = len(nl1), len(nl2) # actions tables init f_actions = init_matrix(size1+1, size2+1, [[0, 0, C_INFINI, None]]) actions = init_matrix(size1+1, size2+1, None) # insert None elmt to have index from 1 to size instead of 0,size-1 nl1.insert(0, None) nl2.insert(0, None) ## after that, let's go !! ### for x in range(1, size1+1): if nl1[x][N_KEYROOT]: for y in range(1, size2+1): if nl2[y][N_KEYROOT]: # all the job is in function below self._process_nodes(x, y, nl1, nl2, f_actions, actions) self._mainformat(actions[size1][size2]) self._formatter.end() #### private functions #### def _process_nodes(self, x, y, nl1, nl2, f_actions, actions): """ job for each keyroot nodes after round for nodes (nl1[x], nl2[y]), actions[x][y] will contain the best list of actions to transform nl1[x] into nl2[y] (f_actions[x][y] too but it may be override in the next round) """ lx = nl1[x][N_LEFTMOST] ly = nl2[y][N_LEFTMOST] f_actions[lx - 1][ly - 1] = [[0, 0, 0, None]] # init forrest distance array by the cost of removing and appending # each subtree on a cumulative basis for i in range(lx, x+1): f_actions[i][ly - 1] = f_actions[nl1[i][N_LEFTMOST] - 1][ly - 1][:] cost = remove_tree(nl1[i]) add_action(f_actions[i][ly - 1], [AT_REMOVE, cost, f_actions[i][ly - 1][-1][EZS_A_FCOST]+cost, nl1[i] ]) for j in range(ly, y+1): f_actions[lx - 1][j] = f_actions[lx - 1][nl2[j][N_LEFTMOST] - 1][:] cost = append_tree(nl2[j]) add_action(f_actions[lx - 1][j], [AT_APPEND, cost, f_actions[lx - 1][j][-1][EZS_A_FCOST]+cost, nl2[j], nl1[x] ]) # look for the shortest way for i in range(lx, x+1): for j in range(ly, y+1): li = nl1[i][N_LEFTMOST] lj = nl2[j][N_LEFTMOST] # min cost between gamma, remove(nl1[i]), append(nl2[j], nl1[i]) f_actions[i][j] = choose(f_actions, [ [i-1, j, [AT_REMOVE, gamma(nl1[i], None), 0, nl1[i]]], [i, j-1, [AT_APPEND, gamma(None, nl2[j]), 0, nl2[j], nl1[x]]], [li-1, j, [AT_REMOVE, remove_tree(nl1[i]), 0, nl1[i]]], [i, lj-1, [AT_APPEND, append_tree(nl2[j]), 0, nl2[j], nl1[x]]] ]) if li == lx and lj == ly: # min between just calculed and last loop + change f_actions[i][j] = choose(f_actions, [ [i, j, [0, 0, 0]], [i-1, j-1, [0, gamma(nl1[i], nl2[j]), 0, nl1[i], nl2[j]]] ]) # now we got the best way from nl1[i] to nl2[j, save it actions[i][j] = f_actions[i][j][:] else: if nl1[i][N_KEYROOT] and nl2[j][N_KEYROOT] \ and nl1[i][N_NAME] != '/' and nl2[j][N_NAME] != '/': # min between just calculed and swap f_actions[i][j] = choose(f_actions, [ [i, j, [0, 0, 0]], [nl1[li-1][N_LEFTMOST] - 1, nl2[lj-1][N_LEFTMOST] - 1, [AT_SWAP, swap_trees(nl1[i], nl1[li-1], nl2[j], nl2[lj-1]), 0, nl1[i], nl1[li-1] ] ] ]) # min between just calculed and last forest distance val = f_actions[li-1][lj-1][-1][EZS_A_FCOST] + \ actions[i][j][-1][EZS_A_FCOST] if f_actions[i][j][-1][EZS_A_FCOST] > val: # concat the 2 actions list f_actions[i][j] = actions[i][j][:] sibl_cost = f_actions[i][j][-1][EZS_A_FCOST] if li-1 > 0 and lj-1 > 0: for action in f_actions[li-1][lj-1]: action = action[:] action[EZS_A_FCOST] = action[EZS_A_FCOST] +\ sibl_cost f_actions[i][j].append(action) def _mainformat(self, action_list): """ transform ezs output in standard format """ # remove actions with cost = 0 action_list = filter(lambda x: x[EZS_A_COST]!=0, action_list) for action in action_list: n_action = action #None # step1: transform the 3 operations SWAP, REMOVE, APPEND # from ezs output to SWAP, REMOVE, APPEND, UPDATE according # to the node and action type # print '-'*80 # print 'action',action ## # if the action main node have been added ## if action[EZS_A_N1][N_TYPE] == NT_SYST: ## if action[EZS_A_TYPE] in (AT_APPEND, AT_SWAP): ## node2 = action[EZS_A_N2][N_CHILDS][0] ## else: ## try: ## node2 = action[EZS_A_N2] ## except: node2 = None ## n_action = [action[EZS_A_TYPE], 1, 0, action[EZS_A_N1][N_CHILDS][0], node2, ## node2] ## # action main node is from the original document ## else: # action[EZS_A_N1][N_TYPE] != NT_SYST # those nodes should only be remove + append (= update) ## if action[EZS_A_TYPE] == AT_APPEND: ## if action[EZS_A_N2][N_VALUE] in ['N','T','C']: ## delete = action[EZS_A_N2] ## elif action[EZS_A_N2][N_PARENT]: ## delete = action[EZS_A_N2][N_CHILDS][get_pos(action[EZS_A_N1][N_PARENT])] ## ## delete = action[EZS_A_N2][N_PARENT][N_CHILDS][get_pos(action[EZS_A_N1][N_PARENT])][N_CHILDS][0] ## else: ## # the root has changed ## delete = action[EZS_A_N2] ## # attribute node ## ## if action[EZS_A_N1][N_TYPE] in (NT_ATTN, NT_ATTV): ## ## node2 = action[EZS_A_N2][N_PARENT][N_CHILDS][0] ## if action[EZS_A_N1][N_TYPE] == NT_ATTN: ## node2 = action[EZS_A_N2][N_PARENT] ## elif action[EZS_A_N1][N_TYPE] == NT_ATTV: ## node2 = action[EZS_A_N2][N_PARENT][N_PARENT] ## # element node ## elif action[EZS_A_N1][N_TYPE] == NT_NODE: ## node2 = action[EZS_A_N2]#[N_CHILDS][0] ## # comment or textnode ## else: #if action[EZS_A_N1][EZS_A_TYPE] == NT_TEXT: ## node2 = action[EZS_A_N2] ## n_action = [AT_UPDATE, 1, 0, action[EZS_A_N1], node2, delete] ## # step2: transform the 4 operations SWAP, REMOVE, APPEND, UPDATE from step1 ## # output to SWAP, REMOVE, APPEND, UPDATE, INSERT_AFTER, INSERT_BEFORE and ## # RENAME according to the nodes and action type, and convert it to list ## if n_action: if n_action[EZS_A_TYPE] == AT_UPDATE: if n_action[EZS_A_N1][N_TYPE] in (NT_NODE, NT_ATTN) : action_l = ['rename', f_xpath(n_action[EZS_A_DEL]), n_action[EZS_A_N1][N_VALUE]] else: action_l = ['update', f_xpath(n_action[EZS_A_DEL]), n_action[EZS_A_N1][N_VALUE]] elif n_action[EZS_A_TYPE] == AT_SWAP: action_l = ['swap', n_action[EZS_A_N1], n_action[EZS_A_N2]] elif n_action[EZS_A_TYPE] == AT_REMOVE: action_l = ['remove', f_xpath(n_action[EZS_A_N1])] elif n_action[EZS_A_TYPE] == AT_APPEND: if n_action[EZS_A_N1][N_TYPE] == NT_ATTN: action_l = ['append', f_xpath(n_action[EZS_A_N2]), n_action[EZS_A_N1]] elif n_action[EZS_A_N2][N_PARENT] and \ nb_childs(n_action[EZS_A_N2][N_PARENT]) > 1: index = get_pos(n_action[EZS_A_N1][N_PARENT]) if index == 1 and \ nb_childs(n_action[EZS_A_DEL][N_PARENT]) > 1: action_l = ['append-first', f_xpath(n_action[EZS_A_N1][N_PARENT][N_PARENT][N_CHILDS][0]), n_action[EZS_A_N1]] elif index > 1: action_l = ['insert-after', f_xpath(n_action[EZS_A_N1][N_PARENT][N_PARENT][N_CHILDS][index-1]), n_action[EZS_A_N1]] else: action_l = ['append-last', f_xpath(n_action[EZS_A_DEL][N_PARENT]), n_action[EZS_A_N1]] else: action_l = ['append', f_xpath(n_action[EZS_A_N2]), n_action[EZS_A_N1]] # fully format this action self._formatter.add_action(action_l) def _post_order(self, node, nodes_list, keyroot, nodes=1): ''' recursivre function which add following attributes to the tree: * "number", the post ordered number of the node (integer), * "left most", the post ordered number of the left most child (or itself if none) * "keyroot", a boolean (all nodes are keyroot except each leftmost nodes) each element node is post ordered numbered return the current number (equal the number of nodes when all the tree has been processed) ''' if node is not None: # add keyroot and leftmost attributes node.append(keyroot) node.append(nodes) for child in node[N_CHILDS]: nodes = self._post_order(child, nodes_list, child is not node[N_CHILDS][0], nodes) nodes_list.append(node) return nodes + 1 return nodes xmldiff-0.6.10/__pkginfo__.py0000644000201200020120000000421111435670414014747 0ustar alainalain# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ Copyright (c) 2001-2010 LOGILAB S.A. (Paris, FRANCE). http://www.logilab.fr/ -- mailto:contact@logilab.fr """ modname = distname = 'xmldiff' numversion = (0, 6, 10) version = '.'.join(map(str, numversion)) description = "tree 2 tree correction between xml documents" author = "Sylvain Thénault" author_email = "sylvain.thenault@logilab.fr" web = "http://www.logilab.org/project/%s" % modname ftp = "ftp://ftp.logilab.org/pub/%s" % modname license = 'GPL' copyright = '''Copyright © 2001-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved. http://www.logilab.fr/ -- mailto:contact@logilab.fr''' long_description = """Xmldiff is a utility for extracting differences between two xml files. It returns a set of primitives to apply on source tree to obtain the destination tree. . The implementation is based on _Change detection in hierarchically structured - information_, by S. Chawathe, A. Rajaraman, H. Garcia-Molina and J. Widom, - Stanford University, 1996""" mailinglist = 'xml-projects@logilab.org' from os.path import join scripts = [join('bin', 'xmldiff'), join('bin', 'xmlrev')] include_dirs = [join('test', 'data')] try: from distutils.core import Extension ext_modules = [Extension('xmldiff.maplookup', ['extensions/maplookup.c'])] except: pass data_files = [("share/sgml/stylesheet/xmldiff", ['xsl/docbook_rev.xsl', 'xsl/xmlrev.xslt']) ] pyversions = ['2.3'] xmldiff-0.6.10/objects.py0000644000201200020120000003701411434467463014165 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ provides constantes for using node and action (list) and some functions for these objects use /!\ /!\ do not call index, remove or compare two node with == since a node is a recursive list """ from xmldiff.misc import TRUE, FALSE from sys import stdout, stderr XUPD_URI = 'http://www.xmldb.org/xupdate' XUPD_PREFIX = 'xupdate' ################ ACTIONS ####################################################### A_DESC = 0 # string describes the action A_N1 = 1 # node on which the action applies A_N2 = 2 # optionnal second action argument, maybe node or value # action types (for internal use in ezs algorithm) AT_APPEND = 1 AT_REMOVE = 2 AT_SWAP = 3 AT_UPDATE = 4 AT_RENAME = 5 def actp(act): """ print an internal action (debugging purpose) """ if len(act) > 2: if act[A_DESC][0]=='m': print >> stderr, act[A_DESC], caract(act[A_N1]) print >> stderr, ' ', caract(act[A_N2]) print >> stderr, ' ', caract(act[-2]), act[-3], get_pos(act[-1]) else: print >> stderr, act[A_DESC], caract(act[A_N1]),\ caract(act[A_N2]),\ act[A_N2][N_VALUE] else: print >> stderr, act[A_DESC], caract(act[A_N1]) ################## NODES CONSTANTES ############################################ N_TYPE = 0 # node's type N_NAME = 1 # node's label (to process xpath) N_VALUE = 2 # node's value N_CHILDS = 3 # nodes's childs list N_PARENT = 4 # node's parent N_ISSUE = 5 # node's total issue number N_XNUM = 6 # to compute node's xpath NSIZE = 7 #number of items in a list which represent a node # NODE TYPES #NT_SYST = 0 # SYSTEM node (added by parser) /!\ deprecated NT_NODE = 1 # ELEMENT node NT_ATTN = 2 # ATTRIBUTE NAME node NT_ATTV = 3 # ATTRIBUTE VALUE node NT_TEXT = 4 # TEXT node NT_COMM = 5 # COMMENT node NT_ROOT = 6 # root node NODES_TYPES = ('NT','NN','AN','AV','T','C', 'R') # for printing ################## OPERATIONS EDITING NODES #################################### def link_node(parent, child): """ link child to his parent """ if child: parent[N_CHILDS].append(child) child[N_PARENT] = parent def insert_node(node, new, pos): """ insert child new on node at position pos (integer) """ node[N_CHILDS].insert(pos, new) new[N_PARENT] = node i, j = 0, 1 while i < len(node[N_CHILDS]): n = node[N_CHILDS][i] if n[N_NAME] == new[N_NAME] and n[N_TYPE] == new[N_TYPE]: n[N_XNUM] = j j += 1 i += 1 def delete_node(node): """ delete a node from its tree """ siblings = node[N_PARENT][N_CHILDS] i = get_pos(node) siblings.pop(i) node[N_PARENT] = None while i < len(siblings): n = siblings[i] if n[N_NAME] == node[N_NAME] and n[N_TYPE] == node[N_TYPE]: n[N_XNUM] -= 1 i += 1 def rename_node(node, new_name): """ rename a node this is necessary for xpath """ siblings = node[N_PARENT][N_CHILDS] pos = get_pos(node) xnum = 1 for i in range(len(siblings)): n = siblings[i] if i < pos: if n[N_NAME] == new_name and n[N_TYPE] == node[N_TYPE]: xnum += 1 elif i != pos: if n[N_NAME] == node[N_NAME] and n[N_TYPE] == node[N_TYPE]: n[N_XNUM] -= 1 elif n[N_NAME] == new_name and n[N_TYPE] == node[N_TYPE]: n[N_XNUM] += 1 node[N_NAME] = new_name node[N_XNUM] = xnum ################## OPERATIONS FORMATING NODES ################################## def caract(node): """ return a string which represent the node """ return '%s:%s (%s) %s %s' % (NODES_TYPES[node[N_TYPE]], node[N_VALUE], f_xpath(node), id(node), node[N_ISSUE]) def f_xpath(node, x=''): """ compute node's xpath """ if node[N_NAME] != '/': if node[N_TYPE] == NT_ATTN: return f_xpath(node[N_PARENT], '/%s'%node[N_NAME][:len(node[N_NAME])-4]) if node[N_TYPE] == NT_ATTV: return f_xpath(node[N_PARENT]) #[N_PARENT], '/%s'%node[N_NAME]) return f_xpath(node[N_PARENT], '/%s[%d]%s'%( node[N_NAME], node[N_XNUM], x)) elif not x: return '/' return x def node_repr(node): """ return a string which represents the given node """ s = '%s\n' % caract(node) for child in node[N_CHILDS]: s = '%s%s' % (s, _indent(child, ' ')) return s def _indent(node, indent_str): s = '%s\-%s\n' % (indent_str, caract(node)) if next_sibling(node) is not None: indent_str = '%s| ' % indent_str else: indent_str = '%s ' % indent_str for child in node[N_CHILDS]: s = '%s%s' % (s, _indent(child, indent_str)) return s def xml_print(node, indent='', xupdate=0, stream=stdout): """ recursive function which write the node in an xml form without the added nodes """ if xupdate: _xml_print_xupdate(node, indent, stream) else: _xml_print_internal_format(node, indent, stream) def _xml_print_internal_format(node, indent, stream): if node[N_TYPE] == NT_NODE: attrs_s = '' i = 0 while i < len(node[N_CHILDS]): n = node[N_CHILDS][i] if n[N_TYPE] == NT_ATTN: i += 1 attrs_s = '%s %s="%s"' % (attrs_s, n[N_VALUE], n[N_CHILDS][0][N_VALUE]) else: break if len(node[N_CHILDS]) > i: stream.write('%s<%s%s>\n' % (indent, node[N_VALUE], attrs_s)) for _curr_node in node[N_CHILDS][i:]: _xml_print_internal_format(_curr_node, indent + ' ', stream=stream) stream.write('%s\n' % (indent, node[N_VALUE])) else: stream.write('%s<%s%s/>\n' % (indent, node[N_VALUE], attrs_s)) elif node[N_TYPE] == NT_ATTN: stream.write('%s<@%s>\n' % (indent, node[N_VALUE])) stream.write(node[N_CHILDS][0][N_VALUE] + '\n') stream.write('%s\n' % (indent, node[N_VALUE])) elif node[N_TYPE] == NT_COMM: stream.write('%s\n' % (indent, node[N_VALUE])) elif node[N_TYPE] == NT_TEXT: stream.write(node[N_VALUE] + '\n') else: stream.write('unknown node type',`node[N_TYPE]`) def _xml_print_xupdate(node, indent, stream): # if suffix -> xupdate attrs_s = ' name="%s"' % node[N_VALUE] if node[N_TYPE] == NT_NODE: stream.write('%s<%s:element%s>' % (indent, XUPD_PREFIX, attrs_s)) i = 0 while i < len(node[N_CHILDS]): n = node[N_CHILDS][i] if n[N_TYPE] == NT_ATTN: stream.write('%s <%s:attribute name="%s">' % (indent, XUPD_PREFIX, n[N_VALUE])) stream.write('%s' % n[N_CHILDS][0][N_VALUE]) stream.write('\n' % XUPD_PREFIX) else: xml_print(n, indent = indent + ' ', stream = stream) i += 1 stream.write('%s\n' % (indent, XUPD_PREFIX)) elif node[N_TYPE] == NT_ATTN: stream.write('%s<%s:attribute%s>' % (indent, XUPD_PREFIX, attrs_s)) stream.write(node[N_CHILDS][0][N_VALUE]) stream.write('\n' % XUPD_PREFIX) elif node[N_TYPE] == NT_ATTV: stream.write('%s<%s:attribute name="%s">' % (indent, XUPD_PREFIX, node[N_PARENT][N_VALUE])) stream.write(node[N_VALUE]) stream.write('\n' % XUPD_PREFIX) elif node[N_TYPE] == NT_COMM: stream.write('%s<%s:comment>' % (indent, XUPD_PREFIX)) stream.write(node[N_VALUE]) stream.write('\n' % XUPD_PREFIX) elif node[N_TYPE] == NT_TEXT: stream.write('%s<%s:text>' % (indent, XUPD_PREFIX)) stream.write(node[N_VALUE]) stream.write('\n' % XUPD_PREFIX) def to_dom(node, doc, uri=None, prefix=None): """ recursive function to convert internal tree in an xml dom tree without the added nodes """ if node[N_TYPE] == NT_NODE: dom_n = doc.createElementNS(uri, '%selement'%prefix) dom_n.setAttributeNS(None, 'name', node[N_VALUE]) for n in node[N_CHILDS]: if n[N_TYPE] == NT_ATTN: dom_n = doc.createElementNS(uri, '%sattribute'%prefix) v = unicode(n[N_CHILDS][0][N_VALUE], 'UTF-8') dom_n.setAttributeNS(None, 'name', n[N_VALUE]) dom_n.appendChild(doc.createTextNode(v)) else: dom_n.appendChild(to_dom(n, doc, uri)) elif node[N_TYPE] == NT_ATTN: dom_n = doc.createElementNS(uri, '%sattribute'%prefix) dom_n.setAttributeNS(None, 'name', node[N_VALUE]) v = unicode(node[N_CHILDS][0][N_VALUE], 'UTF-8') dom_n.appendChild(doc.createTextNode(v)) elif node[N_TYPE] == NT_COMM: dom_n = doc.createElementNS(uri, '%scomment'%prefix) v = unicode(node[N_VALUE], 'UTF-8') dom_n.appendChild(doc.createTextNode(v)) elif node[N_TYPE] == NT_TEXT: dom_n = doc.createElementNS(uri, '%stext'%prefix) v = unicode(node[N_VALUE], 'UTF-8') dom_n.appendChild(doc.createTextNode(v)) return dom_n ################## OPERATIONS GIVING INFOS ON NODES ############################ def get_pos(node): """ return the index of a node in its parent's children list /!\ /!\ do not call index, remove or compare two node with == since a node is a recursive list """ try: childs = node[N_PARENT][N_CHILDS] for i in xrange(len(childs)): if childs[i] is node: return i except TypeError, e: return -1 except ValueError, e: return -1 def nb_childs(node): """ return the number of childs (without attribute childs) of the given node """ return len(filter(lambda n: n[N_CHILDS][0][N_TYPE] != NT_ATTN, node[N_CHILDS])) def nb_attrs(node): """ return the number of attributes of the given node """ for i in xrange(len(node[N_CHILDS])): if node[N_CHILDS][i][N_TYPE] != NT_ATTN: break else: try: i += 1 except UnboundLocalError: i = 0 return i ################## MISCELLANEOUS OPERATIONS ON NODES ########################### def next_sibling(node): """ return the node's right sibling """ if node[N_PARENT] is None: return None myindex = get_pos(node) if len(node[N_PARENT][N_CHILDS]) > myindex+1: return node[N_PARENT][N_CHILDS][myindex+1] return None def previous_sibling(node): """ return the node's left sibling """ myindex = get_pos(node) if node[N_PARENT] and myindex > 0: return node[N_PARENT][N_CHILDS][myindex-1] return None def get_ancestors(node, l): """ append to l all the ancestors from node """ while node[N_PARENT]: l.append(node) node = node[N_PARENT] return l def get_labels(tree, labels, leaf_labels): """ Chain all nodes with a given label l in tree T together, from left to right, by filling dictionnaries labels and leaf_labels (for leaf nodes). Label are keys pointing to a list of nodes with this type. Node x occurs after y in the list if x appears before y in the in-order traversal of T. /!\ /!\ since this isn't binary tree, post order traversal (?) """ if tree and tree[N_CHILDS]: for node in tree[N_CHILDS]: get_labels(node, labels, leaf_labels) labels.setdefault(NODES_TYPES[tree[N_TYPE]], []).append(tree) elif tree: leaf_labels.setdefault(NODES_TYPES[tree[N_TYPE]], []).append(tree) def make_bfo_list(tree): """ create a list with tree nodes in breadth first order """ l, queue = [], [] if tree: l.append(tree) if tree[N_CHILDS]: node = tree[N_CHILDS][0] while node: l.append(node) if node[N_CHILDS]: queue.append(node) node = next_sibling(node) if not node and queue: node = queue.pop(0)[N_CHILDS][0] return l def make_bfo_list(tree): """ create a list with tree nodes in breadth first order """ queue = [ tree ] lst = [ tree ] while queue: node = queue.pop(0) lst.extend( node[N_CHILDS] ) queue.extend( [ n for n in node[N_CHILDS] if n[N_CHILDS] ] ) return lst ### no more used ## def make_po_list(tree): ## """ create a list with tree nodes in post order """ ## l, stack, poped = [], [], 0 ## if tree: ## if tree[N_CHILDS]: ## node = tree[N_CHILDS][0] ## while node: ## if node[N_CHILDS] and not poped: ## stack.append(node) ## node = node[N_CHILDS][0] ## else: ## l.append(node) ## node = next_sibling(node) ## poped = 0 ## if not node and stack: ## node = stack.pop() ## poped = 1 ## l.append(tree) ## return l ## def make_preo_list(tree): ## """ create a list with tree nodes in pre order """ ## l, stack, poped = [], [], 0 ## if tree: ## l.append(tree) ## if tree[N_CHILDS]: ## node = tree[N_CHILDS][0] ## xl.append(node) ## while node: ## if node[N_CHILDS] and not poped: ## stack.append(node) ## node = node[N_CHILDS][0] ## else: ## node = next_sibling(node) ## l.append(node) ## poped = 0 ## if not node and stack: ## node = stack.pop() ## poped = 1 ## return l ## def get_leafs(tree, l): ## """ return a list with all leaf nodes from left to right """ ## if tree and tree[N_CHILDS]: ## node = tree[N_CHILDS][0] ## while node: ## get_leafs(node, l) ## node = next_sibling(node) ## elif tree: ## l.append(tree) ## def get_issue(node, l): ## """ append to l all the descendants from node """ ## for child in node[N_CHILDS]: ## l.append(child) ## if child[N_CHILDS]: ## get_issue(child, l) ## def contains(ancestor, node): ## """ return true if node is descendent of ancestor """ ## if node is None: ## return FALSE ## if ancestor is node: ## return TRUE ## return contains(ancestor, node[N_PARENT]) xmldiff-0.6.10/main.py0000644000201200020120000002252011434500205013432 0ustar alainalain#!/usr/bin/python # Copyright (c) 2001-2010 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import sys import os import getopt def usage(pgm): """Print usage""" print 'USAGE:' print "\t"+pgm, '[OPTIONS] from_file to_file' print "\t"+pgm, '[OPTIONS] [-r] from_directory to_directory' print """ Extract differences between two xml files. It returns a set of primitives to apply on source tree to obtain the destination tree. OPTIONS: -h, --help display this help message and exit. -V, --version display version number and exit -H, --html input files are HTML instead of XML -r, --recursive when comparing directories, recursively compare any subdirectories found. -x, --xupdate display output following the Xupdate xml specification (see http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de). -e encoding, --encoding=encoding specify the encoding to use for output. Default is UTF-8 -n, --not-normalize-spaces do not normalize spaces and new lines in text and comment nodes. -c, --exclude-comments do not process comment nodes -g, --ext-ges include all external general (text) entities. -p, --ext-pes include all external parameter entities, including the external DTD subset. --profile=file display an execution profile (run slower with this option), profile saved to file (binarie form). """ ## -z, --ezs ## use the extended Zhang and Shasha algorithm, much slower ## but with the best results (only for small documents) def process_files(file1, file2, norm_sp, xupd, ezs, verbose, ext_ges, ext_pes, include_comment, encoding, html): """ Computes the diff between two files. """ from xml.sax import SAXParseException try: fh1, fh2 = open(file1, 'r'), open(file2, 'r') except IOError, msg : sys.stderr.write(str(msg) + '\n') return -1 # convert xml files to tree try: from xmldiff.input import tree_from_stream tree1 = tree_from_stream(fh1, norm_sp, ext_ges, ext_pes, include_comment, encoding, html) tree2 = tree_from_stream(fh2, norm_sp, ext_ges, ext_pes, include_comment, encoding, html) fh1.close () fh2.close () except SAXParseException, msg: print msg return -1 if verbose: from xmldiff.objects import node_repr, N_ISSUE, N_CHILDS print "Source tree\n", node_repr(tree1) print "Destination tree\n", node_repr(tree2) print 'Source tree has', tree1[N_ISSUE], 'nodes' print 'Destination tree has', tree2[N_ISSUE], 'nodes' # output formatter if xupd: from xmldiff.format import XUpdatePrinter formatter = XUpdatePrinter() else: from xmldiff.format import InternalPrinter formatter = InternalPrinter() # choose and apply tree to tree algorithm if ezs: from xmldiff.ezs import EzsCorrector strategy = EzsCorrector(formatter) else: from xmldiff.fmes import FmesCorrector #import gc #gc.set_debug(gc.DEBUG_LEAK|gc.DEBUG_STATS) strategy = FmesCorrector(formatter) strategy.process_trees(tree1, tree2) return len(formatter.edit_s) def run(args=None): """ Main. To be called with list of command-line arguments (if provided, args should not contain the executable as first item) FIXME: use optparse and remove usage() ? """ if args is None: pgm = sys.argv[0] args = sys.argv[1:] else: pgm = 'xmldiff' s_opt = 'Hrncgpe:xzhvV' l_opt = ['html', 'recursive', 'not-normalize-space','exclude-comments','ext-ges','ext-pes' 'encoding=', 'xupdate', 'ezs', # DEPRECATED 'help', 'verbose', 'version', 'profile='] # process command line options try: (opt, args) = getopt.getopt(args, s_opt, l_opt) except getopt.error: sys.stderr.write('Unkwown option') sys.exit(-1) recursive, html = 0, 0 xupd, ezs, verbose = 0, 0, 0 norm_sp, include_comment, ext_ges, ext_pes = 1, 1, 0, 0 encoding = 'UTF-8' prof = '' for o in opt: if o[0] == '-r' or o[0] == '--recursive': recursive = 1 elif o[0] == '-H' or o[0] == '--html': html = 1 elif o[0] == '-n' or o[0] == '--not-normalize-space': norm_sp = 0 elif o[0] == '-c' or o[0] == '--exclude-comments': include_comment = 0 elif o[0] == '-g' or o[0] == '--ext-ges': ext_ges = 1 elif o[0] == '-p' or o[0] == '--ext-pes': ext_pes = 1 elif o[0] == '-e' or o[0] == '--encoding': encoding = o[1] elif o[0] == '-x' or o[0] == '--xupdate': xupd = 1 elif o[0] == '-z' or o[0] == '--ezs': ezs = 1 elif o[0] == '-v' or o[0] == '--verbose': verbose = 1 elif o[0] == '-p' or o[0] == '--profile': prof = o[1] elif o[0] == '-h' or o[0] == '--help': usage(pgm) sys.exit(0) elif o[0] == '-V' or o[0] == '--version': from xmldiff.__pkginfo__ import modname, version print '%s version %s' % (modname, version) sys.exit(0) if len(args) != 2: usage(pgm) sys.exit(-2) fpath1, fpath2 = args[0], args[1] exit_status = 0 # if args are directory if os.path.isdir(fpath1) and os.path.isdir(fpath2): from xmldiff.misc import process_dirs, list_print common, deleted, added = process_dirs(fpath1, fpath2, recursive) list_print(deleted[0], 'FILE:', 'deleted') list_print(deleted[1], 'DIRECTORY:', 'deleted') list_print(added[0], 'FILE:', 'added') list_print(added[1], 'DIRECTORY:', 'added') exit_status += sum((len(deleted[0]), len(deleted[1]), len(added[0]), len(added[1]))) for filename in common[0]: print '-'*80 print 'FILE:', filename diffs = process_files(os.path.join(fpath1, filename), os.path.join(fpath2, filename), norm_sp, xupd, ezs, verbose, ext_ges, ext_pes, include_comment, encoding, html) if diffs: exit_status += diffs # if args are files elif os.path.isfile(fpath1) and os.path.isfile(fpath2): if prof: import profile, pstats, time from maplookup import fmes_end,fmes_init, fmes_node_equal, has_couple, match_end, partner, lcs2 import maplookup # replaces cfunction in maplookup by python wrappers def fmes_end_w(*args): return fmes_end(*args) maplookup.fmes_end = fmes_end_w def fmes_init_w(*args): return fmes_init(*args) maplookup.fmes_init = fmes_init_w def fmes_node_equal_w(*args): return fmes_node_equal(*args) maplookup.fmes_node_equal = fmes_node_equal_w def has_couple_w(*args): return has_couple(*args) maplookup.has_couple = has_couple_w def match_end_w(*args): return match_end(*args) maplookup.match_end = match_end_w def partner_w(*args): return partner(*args) maplookup.partner = partner_w def lcs2_w(*args): return lcs2(*args) maplookup.lcs2 = lcs2_w t = time.clock() profiler = profile.Profile() profiler.runctx('process_files(%r,%r,%r,%r,%r,%r,%r,%r,%r,%r,%r)'% ( fpath1, fpath2, norm_sp, xupd, ezs, verbose, ext_ges, ext_pes, include_comment, encoding, html), globals(), locals() ) profiler.dump_stats( prof ) print 'Time:',`time.clock()-t` p = pstats.Stats(prof) p.sort_stats('time','calls').print_stats(.25) p.sort_stats('cum','calls').print_stats(.25) else: exit_status = process_files(fpath1, fpath2, norm_sp, xupd, ezs, verbose, ext_ges, ext_pes, include_comment, encoding, html) else: exit_status = -1 print fpath1, 'and', fpath2, \ 'are not comparable, or not directory nor regular files' sys.exit(exit_status) if __name__ == '__main__': run() xmldiff-0.6.10/PKG-INFO0000644000201200020120000000122311441141234013227 0ustar alainalainMetadata-Version: 1.0 Name: xmldiff Version: 0.6.10 Summary: tree 2 tree correction between xml documents Home-page: http://www.logilab.org/project/xmldiff Author: Sylvain Thénault Author-email: sylvain.thenault@logilab.fr License: GPL Description: Xmldiff is a utility for extracting differences between two xml files. It returns a set of primitives to apply on source tree to obtain the destination tree. . The implementation is based on _Change detection in hierarchically structured - information_, by S. Chawathe, A. Rajaraman, H. Garcia-Molina and J. Widom, - Stanford University, 1996 Platform: UNKNOWN xmldiff-0.6.10/ChangeLog0000644000201200020120000000604411435672775013740 0ustar alainalainChange log for xmldiff ====================== 2010-08-27 -- 0.6.10 * apply Daiki Ueno patch: fails when comparing minimal trees on i386 2009-04-02 -- 0.6.9 * Fixed xmldiff-xmlrev compilation error 2006-06-15 -- 0.6.8 * Fixed 64bit cleanness issues 2005-05-04 -- 0.6.7 * WARNING: xmldiff is no longer a logilab subpackage. Users may have to manually remove the old logilab/xmldiff directory. * fixed debian bug #275750, also reported by Christopher R Newman on the xml-projects mailing list * fixed --profile option, wrap function from maplookup when profiling so that they appear in the profile information * fixed setup.py to ignore the xmlrev shell script under windows platforms * small improvements (remove recursion in object.py, minor enhancement in mydifflib.py, rewrite of lcs4 in C) 2004-12-23 -- 0.6.6 * Applied patch by Bastian Kleineidam which - corrects the typo in ML_DIR - fixes the TMPFILE_XSLT/TMPFILE_XSL typo - makes sure the files are XML or SGML files, else prints an error - adds various missing quotes around filenames which could have spaces or begin with a hyphen - fixes typos in the usage() function Thanks a lot, Bastian. * Fixed some problems in the xmlrev.xslt stylesheet * Fixed problems in xmlrev caused by the exit status of xmldiff when successful * Added a man page for xmldiff and xmlrev 2004-09-02 -- 0.6.5 * xmlrev bugfixes * Fixed packaging problems (missing xsl stylesheets and MANIFEST file) 2003-10-02 -- 0.6.4 * fix recursive mode * rewrite regression test, add test for the recursive mode * add --help option to xlmrev * packaging fixes * turn API.txt and HELP.txt to correct ReST 2002-11-06 -- 0.6.3 * fix wrong xpath for attributes * fix bug with temporary duplicate attribute node * fix for xupdate * fix ext_pes option bug * update changelog to new format 2002-09-23 -- 0.6.2: * return number of differences on command line * reintroduce misc.list_print which caused recursive mode to fail * use psyco if available (http://psyco.sf.net) * little changes in C extension 2002-08-29 -- 0.6.1: * fix packaging problems 2002-08-23 -- 0.6.0: * change of the internal representation * remove support for the EZS algorithm (no more maintened for the moment) * add command line options to parse html and to control entities inclusion and output encoding * fixing coalescing text nodes bug * many other bugs fixes * great speed improvement 2002-01-31 -- 0.5.3: * add __init__.py in "logilab" directory 2001-10-29 -- 0.5.2: * bug fixes in xupdate formatting and in the dom interface. 2001-09-07 -- 0.5.1: * Fast Match / Edit Scritp algorithm, now fully usable * fixes Unicode problem 2001-08-10 -- 0.2.1: * bug fixes, optimizations for ezs algorithm 2001-08-04 -- 0.1.1: * original revision xmldiff-0.6.10/fmes.py0000644000201200020120000004473111434467463013472 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # """ this file provides the fast match / edit script (fmes) tree to tree correction algorithm as described in "Change detection in hierarchically structured information" by S. Chawathe, A. Rajaraman, H. Garcia-Molina and J. Widom ([CRGMW95]) """ from xmldiff.objects import NT_ROOT, NT_NODE, NT_ATTN, NT_ATTV, \ NT_TEXT, NT_COMM, N_TYPE, N_NAME, N_VALUE, N_CHILDS, N_PARENT, N_ISSUE, \ N_XNUM, NSIZE, A_DESC, A_N1, A_N2, FALSE, TRUE, \ node_repr, get_labels, get_ancestors, caract, make_bfo_list, \ insert_node, delete_node, rename_node, get_pos, \ f_xpath, nb_attrs, xml_print from xmldiff.mydifflib import lcs2, quick_ratio from xmldiff.misc import intersection, in_ref, index_ref # c extensions from xmldiff.maplookup import has_couple , partner, fmes_init, \ fmes_node_equal, match_end, fmes_end # node's attributes for fmes algorithm N_INORDER = NSIZE N_MAPPED = N_INORDER + 1 def _init_tree(tree, map_attr=None): """ recursively append N_INORDER attribute to tree optionnaly add the N_MAPPED attribute (for node from tree 1) """ tree.append(FALSE) if not map_attr is None: tree.append(FALSE) for child in tree[N_CHILDS]: _init_tree(child, map_attr) ## FMES TREE 2 TREE ALGORITHM ################################################# class FmesCorrector: """ Fast Match / Edit Script implementation See [CRGMW95] for reference. """ def __init__(self, formatter, f=0.6, t=0.5): # f=0,59 # algorithm parameters if f>1 or f<0 or t>1 or t<0.5: raise Exception('Invalid parameters: 1 > f > 0 and 1 > t > 0.5') self.F = f self.T = t self._formatter = formatter def process_trees(self, tree1, tree2): """ Process the two trees """ # add needed attribute (INORDER) _init_tree(tree1, map_attr=1) _init_tree(tree2) ## print '**** TREE 2' ## print node_repr(tree2) ## print '**** TREE 1' ## print node_repr(tree1) # attributes initialisation self._mapping = [] # empty mapping self.add_action = self._formatter.add_action self._d1, self._d2 = {}, {} # give references to the C extensions specific to fmes fmes_init(self._mapping, self._d1, self._d2, self.T) self._dict = {} self._tmp_attrs_dict = {} self._pending = [] self._formatter.init() # step 0: mapping self._fast_match(tree1, tree2) # free matching variables match_end() del self._d1 del self._d2 # step 1: breadth first search tree2 self._fmes_step1(tree2, tree1) # step 2: post order traversal tree1 self._fmes_step2(tree1, tree2) # step 3: rename tmp attributes for tmp_name, real_name in self._tmp_attrs_dict.items(): self.add_action(['rename','//%s'%tmp_name, real_name]) # free mapping ref in C extensions fmes_end() self._formatter.end() ## Private functions ###################################################### def _fast_match(self, tree1, tree2): """ the fast match algorithm try to resolve the 'good matching problem' """ labl1, labl2 = {}, {} leaf_labl1, leaf_labl2 = {}, {} # chain all nodes with a given label l in tree T together get_labels(tree1, labl1, leaf_labl1) get_labels(tree2, labl2, leaf_labl2) # do the matching job self._match(leaf_labl1, leaf_labl2, self._l_equal) # remove roots ('/') from labels del labl1['R'] del labl2['R'] # append roots to mapping self._mapping.append((tree1, tree2)) # mark node as mapped tree1[N_MAPPED] = TRUE self._match(labl1, labl2, fmes_node_equal)#self._n_equal def _match(self, lab_l1, lab_l2, equal): """do the actual matching""" d1, d2 = self._d1, self._d2 mapping = self._mapping # for each leaf label in both tree1 and tree2 l = intersection(lab_l1.keys(), lab_l2.keys()) # sort list to avoid differences between python version l.sort() for label in l: s1 = lab_l1[label] s2 = lab_l2[label] # compute the longest common subsequence common = lcs2(s1, s2, equal) # for each pair of nodes (x,y) in the lcs for x, y in common: # add (x,y) to the mapping mapping.append((x, y)) # mark node from tree 1 as mapped x[N_MAPPED] = TRUE # fill the mapping cache for n in get_ancestors(x, []): d1[(id(n), id(x))] = 1 for n in get_ancestors(y, []): d2[(id(n), id(y))] = 1 def _fmes_step1(self, tree2, tree1): """ first step of the edit script algorithm combines the update, insert, align and move phases """ mapping = self._mapping fp = self._find_pos al = self._align_children _partner = partner # x the current node in the breadth-first order traversal for x in make_bfo_list(tree2): y = x[N_PARENT] z = _partner(1, y) w = _partner(1, x) # insert if not w: todo = 1 # avoid to add existing attribute node if x[N_TYPE] == NT_ATTN: for w in z[N_CHILDS]: if w[N_TYPE] != NT_ATTN: break elif w[N_VALUE] == x[N_VALUE]: ## FIXME: what if w or w[N_CHILDS][0] yet mapped ?? if not w[N_MAPPED]: ## old_value = x[N_VALUE] ## x[N_VALUE] = 'xmldiff-%s'%old_value ## self._tmp_attrs_dict[x[N_VALUE]] = old_value ## old_x = _partner(0, w) ## i = 0 ## for i in range(len(mapping)): ## if mapping[i][0] is w: ## print mapping[i][1] ## mapping[i][1][N_MAPPED] = FALSE ## mapping.pop(i) ## break ## else: todo = None w[N_MAPPED] = TRUE mapping.append((w, x)) # print 'delete 1' # if not w[N_CHILDS][0]: delete_node(w[N_CHILDS][0]) break if todo is not None: x[N_INORDER] = TRUE k = fp(x) # w = copy(x) w = x[:] w[N_CHILDS] = [] w.append(TRUE) # <-> w[N_MAPPED] = TRUE mapping.append((w, x)) # avoid coalescing two text nodes if w[N_TYPE] == NT_TEXT: k = self._before_insert_text(z, w, k) # real insert on tree 1 insert_node(z, w, k) # make actions on subtree self._dict[id(w)] = ww = w[:] ww[N_CHILDS] = [] # preformat action if not self._dict.has_key(id(z)): if w[N_TYPE] == NT_ATTV: action = ['update', f_xpath(z), w[N_VALUE]] elif w[N_TYPE] == NT_ATTN: action = ['append', f_xpath(z), ww] elif z[N_TYPE] == NT_ROOT: action = ['append-first', '/', ww] else: k = get_pos(w) if k <= nb_attrs(z): action = ['append-first', f_xpath(z), ww] else: action = ['insert-after', f_xpath(z[N_CHILDS][k-1]), ww] self.add_action(action) else: insert_node(self._dict[id(z)], ww, k) elif x[N_NAME] != '/': v = w[N_PARENT] # update if w[N_VALUE] != x[N_VALUE]: # format action if w[N_TYPE] == NT_NODE: self.add_action(['rename', f_xpath(w), x[N_VALUE]]) elif w[N_TYPE] == NT_ATTN: attr_name = self._before_attribute(w[N_PARENT], w, x[N_VALUE]) self.add_action(['rename', f_xpath(w), attr_name]) x[N_NAME] = '@%sName' % attr_name x[N_VALUE] = attr_name else: self.add_action(['update', f_xpath(w), x[N_VALUE]]) # real update on t1 w[N_VALUE] = x[N_VALUE] # this is necessary for xpath rename_node(w, x[N_NAME]) # move x if parents not mapped together if not has_couple(v, y): x[N_INORDER] = TRUE k = fp(x) self._make_move(w, z, k) # align children al(w, x) # print 'after', node_repr(tree1) def _fmes_step2(self, tree1, tree2): """ the delete_node phase of the edit script algorithm instead of the standard algorithm, walk on tree1 in pre order and add a remove action on node not marked as mapped. Avoiding recursion on these node allow to extract remove on subtree instead of leaf do not use next_sibling for performance issue """ stack = [] i = 0 node = tree1 while node is not None: if node[N_MAPPED] != TRUE: if node[N_PARENT] and len(node[N_PARENT][N_CHILDS]) > i+1: next_node = node[N_PARENT][N_CHILDS][i+1] # if next node is a text node to remove, switch actions if next_node[N_TYPE] == NT_TEXT and \ next_node[N_MAPPED] != TRUE: self.add_action(['remove', f_xpath(next_node)]) delete_node(next_node) try: next_node = node[N_PARENT][N_CHILDS][i+1] except: next_node = None else: next_node = None self.add_action(['remove', f_xpath(node)]) delete_node(node) node = next_node elif node[N_CHILDS]: # push next sibbling on the stack if node[N_PARENT] and len(node[N_PARENT][N_CHILDS]) > i+1 : stack.append((node[N_PARENT][N_CHILDS][i+1], i+1)) node = node[N_CHILDS][0] i = 0 elif node[N_PARENT] and len(node[N_PARENT][N_CHILDS]) > i+1: i += 1 node = node[N_PARENT][N_CHILDS][i] #next_sibling(node) else: node = None if node is None and stack: node, i = stack.pop() def _align_children(self, w, x): """ align children to correct misaligned nodes """ _partner = partner # mark all children of w an d as "out of order" self._childs_out_of_order(w) self._childs_out_of_order(x) # s1: children of w whose partner is children of x s1 = [n for n in w[N_CHILDS] if in_ref(x[N_CHILDS], _partner(0,n))] # s2: children of x whose partners are children of w s2 = [n for n in x[N_CHILDS] if in_ref(w[N_CHILDS], _partner(1,n))] # compute the longest common subsequence s = lcs2(s1, s2, has_couple) # mark each (a,b) from lcs in order for a, b in s: a[N_INORDER] = b[N_INORDER] = TRUE s1.pop(index_ref(s1, a)) # s: a E T1, b E T2, (a,b) E M, (a;b) not E s for a in s1: b = _partner(0, a) # mark a and b in order a[N_INORDER] = b[N_INORDER] = TRUE k = self._find_pos(b) self._make_move(a, w, k) def _find_pos(self, x): """ find the position of a node in the destination tree (tree2) do not use previous_sibling for performance issue """ y = x[N_PARENT] # if x is the leftmost child of y in order, return 1 for v in y[N_CHILDS]: if v[N_INORDER]: if v is x: # return 0 instead of 1 here since the first element of a # list have index 0 return 0 break # looking for rightmost left sibling of y INORDER i = get_pos(x) - 1 while i >= 0: v = y[N_CHILDS][i] if v[N_INORDER]: break i -= 1 u = partner(1, v) if not u is None: return get_pos(u)+1 def _make_move(self, n1, n2, k): # avoid coalescing two text nodes act_node = self._before_delete_node(n1) if act_node is not None and act_node[0] is n2 and act_node[1] < k: k += 1 if n1[N_TYPE] == NT_TEXT: k = self._before_insert_text(n2, n1, k) elif n1[N_TYPE] == NT_ATTN: # avoid to move an attribute node from a place to another on # the same node if not n1[N_PARENT] is n2: n1_xpath = f_xpath(n1) old_name = n1[N_VALUE] new_name = self._before_attribute(n2, n1) if new_name != old_name: self.add_action(['remove', f_xpath(n1)]) n1[N_NAME] = '@%sName' % new_name n1[N_VALUE] = new_name self.add_action(['append', f_xpath(n2), n1]) else: self.add_action(['move-first', n1, n2]) elif k <= nb_attrs(n2): self.add_action(['move-first', n1, n2]) else: self.add_action(['move-after', n1, n2[N_CHILDS][k-1]]) # real move delete_node(n1) insert_node(n2, n1, k) def _before_attribute(self, parent_node, attr_node, new_name=None): attr_name = new_name or attr_node[N_VALUE] for w in parent_node[N_CHILDS]: if w[N_TYPE] != NT_ATTN: break if w[N_VALUE] == attr_name: new_name = 'LogilabXmldiffTmpAttr%s'%attr_name.replace(':', '_') self._tmp_attrs_dict[new_name] = attr_name return new_name return attr_name FAKE_TAG = [NT_NODE, 'LogilabXMLDIFFFAKETag', 'LogilabXMLDIFFFAKETag', [], None, 0, 0, TRUE, FALSE] def _before_insert_text(self, parent, new_text, k): """ check if a text node that will be remove has two sibbling text nodes to avoid coalescing two text nodes """ if k > 1: if parent[N_CHILDS][k-1][N_TYPE] == NT_TEXT: tag = self.FAKE_TAG[:] self.add_action(['insert-after', f_xpath(parent[N_CHILDS][k-1]), tag]) insert_node(parent, tag, k) return k+1 if k < len(parent[N_CHILDS]): if parent[N_CHILDS][k][N_TYPE] == NT_TEXT: tag = self.FAKE_TAG[:] if k <= nb_attrs(parent): self.add_action(['append-first', f_xpath(parent), tag]) else: self.add_action(['insert-after', f_xpath(parent[N_CHILDS][k]), tag]) insert_node(parent, tag, k) return k def _before_delete_node(self, node): """ check if a text node will be inserted with a sibbling text node to avoid coalescing two text nodes """ k = get_pos(node) parent = node[N_PARENT] if k >= 1 and k+1 < len(parent[N_CHILDS]): if parent[N_CHILDS][k-1][N_TYPE] == NT_TEXT and \ parent[N_CHILDS][k+1][N_TYPE] == NT_TEXT: tag = self.FAKE_TAG[:] self.add_action(['insert-after', f_xpath(parent[N_CHILDS][k-1]), tag]) insert_node(parent, tag, k) return parent, k return None def _childs_out_of_order(self, subtree): """ initialisation function : tag all the subtree as unordered """ for child in subtree[N_CHILDS]: child[N_INORDER] = FALSE self._childs_out_of_order(child) def _l_equal(self, n1, n2): """ function to compare leafs during mapping """ ratio = quick_ratio(n1[N_VALUE], n2[N_VALUE]) if ratio > self.F: # print 'MATCH (%s): %s / %s' %(ratio, n1[N_VALUE],n2[N_VALUE]) return TRUE # print 'UNMATCH (%s): %s / %s' %(ratio, n1[N_VALUE],n2[N_VALUE]) return FALSE try: import os if os.environ.get('PYLINT_IMPORT') != '1': # avoid erros with pylint import psyco psyco.bind(FmesCorrector._fmes_step1) psyco.bind(FmesCorrector._align_children) ## psyco.bind(FmesCorrector._fmes_step2) psyco.bind(FmesCorrector._match) psyco.bind(FmesCorrector._find_pos) except Exception, e: pass xmldiff-0.6.10/mydifflib.py0000644000201200020120000001750211434467463014501 0ustar alainalain""" longest common subsequence algorithm the algorithm is describe in "An O(ND) Difference Algorithm and its Variation" by Eugene W. MYERS As opposed to the algorithm in difflib.py, this one doesn't require hashable elements """ def lcs2(X, Y, equal): """ apply the greedy lcs/ses algorithm between X and Y sequence (should be any Python's sequence) equal is a function to compare X and Y which must return 0 if X and Y are different, 1 if they are identical return a list of matched pairs in tuplesthe greedy lcs/ses algorithm """ N, M = len(X), len(Y) if not X or not Y : return [] max = N + M v = [0 for i in xrange(2*max+1)] common = [[] for i in xrange(2*max+1)] for D in xrange(max+1): for k in xrange(-D, D+1, 2): if k == -D or k != D and v[k-1] < v[k+1]: x = v[k+1] common[k] = common[k+1][:] else: x = v[k-1] + 1 common[k] = common[k-1][:] y = x - k while x < N and y < M and equal(X[x], Y[y]): common[k].append((x, y)) x += 1 ; y += 1 v[k] = x if x >= N and y >= M: return [ (X[x],Y[y]) for x,y in common[k] ] def lcs4(X, Y, equal): """ apply the greedy lcs/ses algorithm between X and Y sequence (should be any Python's sequence) equal is a function to compare X and Y which must return 0 if X and Y are different, 1 if they are identical return a list of matched pairs in tuplesthe greedy lcs/ses algorithm """ N, M = len(X), len(Y) if not X or not Y : return [] max = N + M v = [0 for i in xrange(2*max+1)] vl = [v] for D in xrange(max+1): for k in xrange(-D, D+1, 2): if k == -D or k != D and v[k-1] < v[k+1]: x = v[k+1] else: x = v[k-1] + 1 y = x - k while x < N and y < M and equal(X[x], Y[y]): x += 1 ; y += 1 v[k] = x if x >= N and y >= M: # reconstruction du chemin vl.append(v) vl_saved = vl[:] path = [ ] k = N-M while vl: oldv = vl.pop(-1) oldk = k if k==-D or k!= D and oldv[k-1] x=%d y=%d v=%r ok=%d k=%d xs=%d D=%d" % (x,y,oldv,oldk,k,xs,D) while x>xs: x -= 1 y -= 1 #print "(%d,%d)" % (x,y) path.append( (X[x],Y[y]) ) D -= 1 x = oldv[k] y = x - k #print "<- x=%d y=%d v=%r ok=%d k=%d xs=%d D=%d" % (x,y,oldv,oldk,k,xs,D) #print x,y,deltax,deltay,oldv, oldk, k path.reverse() return path #, vl_saved vl.append(v[:]) def pprint_V( V, N, M ): for v in V: for k in xrange(-N-M,N+M+1): print "% 3d" % v[k], print def lcs3( X, Y, equal ): N = len(X)+1 M = len(Y)+1 if not X or not Y : return [] # D(i,j) is the length of longest subsequence for X[:i], Y[:j] pre = [0]*M row = [0]*M B = [ [0]*M for i in xrange(N) ] for i in xrange(1,N): for j in xrange(1,M): if equal(X[i-1],Y[j-1]): row[j] = pre[j-1] + 1 B[i][j] = 2 # move back (-1,-1) elif pre[j] >= row[j-1]: row[j] = pre[j] B[i][j] = 1 # move back (0,-1) else: row[j] = row[j-1] B[i][j] = 0 # move back (-1,0) pre,row=row,pre i = N - 1 j = M - 1 L = [] while i>=0 and j>=0: d = B[i][j] #print i,j,d if d == 0: j -= 1 elif d == 1: i -= 1 else: i -= 1 j -= 1 L.append( (X[i], Y[j]) ) L.reverse() #from pprint import pprint #pprint(D) #pprint(B) return L try: import maplookup lcs2 = maplookup.lcs2 #lcs2 = lcs4 except: pass def lcsl(X, Y, equal): """return the length of the result sent by lcs2""" return len(lcs2(X,Y,equal)) def quick_ratio(a,b): """ optimized version of the standard difflib.py quick_ration (without junk and class) Return an upper bound on ratio() relatively quickly. """ # viewing a and b as multisets, set matches to the cardinality # of their intersection; this counts the number of matches # without regard to order, so is clearly an upper bound if not a and not b: return 1 fullbcount = {} for elt in b: fullbcount[elt] = fullbcount.get(elt, 0) + 1 # avail[x] is the number of times x appears in 'b' less the # number of times we've seen it in 'a' so far ... kinda avail = {} availhas, matches = avail.has_key, 0 for elt in a: if availhas(elt): numb = avail[elt] else: numb = fullbcount.get(elt, 0) avail[elt] = numb - 1 if numb > 0: matches = matches + 1 return 2.0 * matches / (len(a) + len(b)) try: import os if os.environ.get('PYLINT_IMPORT') != '1': # avoid erros with pylint import psyco psyco.bind(lcs2) except Exception, e: pass def test(lcs2=lcs2): """ FIXME this should go into the test suite. """ import time t = time.clock() quick_ratio('abcdefghijklmnopqrst'*100, 'abcdefghijklmnopqrst'*100) print 'quick ratio :',time.clock()-t lcs2('abcdefghijklmnopqrst'*100, 'abcdefghijklmnopqrst'*100, lambda x, y : x==y) print 'lcs2 : ',time.clock()-t quick_ratio('abcdefghijklmno'*100, 'zyxwvutsrqp'*100) print 'quick ratio :',time.clock()-t lcs2('abcdefghijklmno'*100, 'zyxwvutsrqp'*100, lambda x, y : x==y) print 'lcs2 : ',time.clock()-t quick_ratio('abcdefghijklmnopqrst'*100, 'abcdefghijklmnopqrst'*100) print 'quick ratio :',time.clock()-t lcs2('abcdefghijklmnopqrst'*100, 'abcdefghijklmnopqrst'*100, lambda x, y : x==y) print 'lcs2 : ',time.clock()-t quick_ratio('abcdefghijklmno'*100, 'zyxwvutsrqp'*100) print 'quick ratio :',time.clock()-t lcs2('abcdefghijklmno'*100, 'zyxwvutsrqp'*100, lambda x, y : x==y) print 'lcs2 : ',time.clock()-t def main( lcs2=lcs2 ): print "abcde - bydc" print lcsl('abcde', 'bydc', lambda x, y : x==y) for a in lcs2('abcde', 'bydc', lambda x, y : x==y): print a print "abacdge - bcdg" print lcsl('abacdge', 'bcdg', lambda x, y : x==y) for a in lcs2('abacdge', 'bcdg', lambda x, y : x==y): print a import random def randstr( lmin, lmax, alphabet ): L = random.randint( lmin, lmax) S = [] N = len(alphabet)-1 for i in range(L): S.append( alphabet[random.randint(0,N)] ) return "".join(S) def randtest(): """Generate random test sequences and compare lcs2, lcs3, lcs4""" _cmp = lambda x,y:x==y import maplookup lcsm = maplookup.lcs2 _alpha = "abcdefghijklmnopqrstuvwxyz" while 1: S1 = randstr( 2,5,_alpha ) S2 = randstr( 2,5,_alpha ) print S1, S2 R1 = lcs2( S1, S2, _cmp ) print "lcs2:", "".join( [ x[0] for x in R1 ] ) R2 = lcs4( S1, S2, _cmp ) print "lcs4", "".join( [ x[0] for x in R2 ] ) R3 = lcsm( S1, S2, _cmp ) print "lcsm", "".join( [ x[0] for x in R3 ] ) print assert R1==R2 assert R1==R3 if __name__ == '__main__': main() xmldiff-0.6.10/man/0000755000201200020120000000000011441141233012706 5ustar alainalainxmldiff-0.6.10/man/xmlrev.10000644000201200020120000000154011435670471014323 0ustar alainalain.TH XMLREV 1 "December 22, 2004" xmlrev "User's Manual" .SH NAME xmlrev \- Tree 2 tree correction between xml documents .SH SYNOPSIS .B xmlrev [ .I options .B ] .I original_file modified_file .SH DESCRIPTION .B xmlrev can be used to display the differences between two XML documents computed by .B xmldiff as an HTML document. .SH OPTIONS .IP "-h, --help" display this help message and exit. .IP "--revision" show difference between revisions as an HTML file (default) .IP "--diff" show difference between revisions as XUpdate .SH REQUIRES python, xmldiff, libxml2-utils, xsltproc, sp .SH "SEE ALSO" .B xmldiff(1) .I http://www.logilab.org/project/xmldiff/ .I http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de .SH AUTHOR Sylvain Thénault This manpage was written by Alexandre Fayolle . xmldiff-0.6.10/man/xmldiff.10000644000201200020120000000467211435670511014443 0ustar alainalain.TH XMLDIFF 1 "December 22, 2004" xmldiff "User's Manual" .SH NAME xmldiff \- Tree 2 tree correction between xml documents .SH SYNOPSIS .B xmldiff [ .I options .B ] .I original_file modified_file .B xmldiff [ .I options .B ] .B -r .I original_directoty modified_directory .SH DESCRIPTION .B xmldiff is a utility for extracting differences between two xml files. It returns a set of primitives to apply on source tree to obtain the destination tree. The implementation is based on .I Change detection in hierarchically structured information, by S. Chawathe, A. Rajaraman, H. Garcia-Molina and J. Widom, Stanford University, 1996 .SH OPTIONS .IP "-h, --help" display this help message and exit. .IP "-V, --version" display version number and exit .IP "-H, --html" input files are HTML instead of XML .IP "-r, --recursive" when comparing directories, recursively compare any subdirectories found. .IP "-x, --xupdate" display output following the Xupdate xml specification (see http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de). .IP "-e , --encoding=" specify the encoding to use for output. Default is UTF-8 .IP "-n, --not-normalize-spaces" do not normalize spaces and new lines in text and comment nodes. .IP "-c, --exclude-comments" do not process comment nodes .IP "-g, --ext-ges" include all external general (text) entities. .IP "-p, --ext-pes" include all external parameter entities, including the external DTD subset. .IP "--profile=" display an execution profile (run slower with this option), profile saved to file (binarie form). .SH EXIT STATUS 0 means no difference were found. A positive number means some differences were found. A negative number means an error occured. .SH KNOWN PROBLEMS .B xmldiff uses an algorithm with a (too) high algorithmical complexity, which makes it unsuitable to process large XML documents. If your document has more than about 100 nodes, you should probably look for an alternative solution. Any suggestion to this issue is welcome by Logilab. Please tell us if you are aware of a simpler algorithm to solve this problem. .SH REQUIRES python, pyxml .SH "SEE ALSO" .B xmlrev(1) .I http://www.logilab.org/project/xmldiff/ .I http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de .SH AUTHOR Sylvain Thénault This manpage was written by Alexandre Fayolle . xmldiff-0.6.10/extensions/0000755000201200020120000000000011441141233014332 5ustar alainalainxmldiff-0.6.10/extensions/maplookup.c0000644000201200020120000002446011434467463016535 0ustar alainalain#include #include #include char * __revision__ = "$Id: maplookup.c,v 1.12 2005-06-29 06:49:12 alf Exp $"; /* PYTHON EQUIVALENCES # def _has_couple(couple, mapping): # for a,b in mapping: # if b is couple[1] and a is couple[0]: # return TRUE # return FALSE # def _partner(index, node, mapping): # for i in mapping: # if i[index] is node: # return i[1-index] # return None # def fmes_node_equal(self, n1, n2): # """ function to compare subtree during mapping """ # hk1, hk2 = self._d1.has_key, self._d2.has_key # mapping = self._mapping # # factor 2.5 for tree expansion compensation # length = 0 # i = 0 # for a,b in mapping: # i += 1 # if hk1((id(n1), id(a))): # if hk2((id(n2), id(b))): # length += 1 ## length = len([a for a,b in mapping ## if hk1((id(n1), id(a))) and hk2((id(n2), id(b)))]) # fact = 2.5*length/float(max(n1[N_ISSUE], n2[N_ISSUE])) # if fact >= self.T: # return TRUE # return FALSE */ /******************* functions specific to the fmes algorithm *****************/ static short N_ISSUE = 5 ; /* function to init objects for the next functions * * arguments are (*mapping, *cache_dict1, *cache_dict2, T) */ static PyObject *_mapping, *_dict1, *_dict2 ; static double _T_treshold ; static void free_dicts(void) { Py_XDECREF(_dict1) ; _dict1 = NULL ; Py_XDECREF(_dict2) ; _dict2 = NULL ; } static void free_global(void) { Py_XDECREF(_mapping) ; _mapping = NULL ; free_dicts() ; } static PyObject *fmes_init(PyObject *self, PyObject *args) { free_global() ; if (!PyArg_ParseTuple(args, "OOOd", &_mapping, &_dict1, &_dict2, &_T_treshold)) return NULL ; Py_INCREF(_mapping) ; Py_INCREF(_dict1) ; Py_INCREF(_dict2) ; Py_INCREF(Py_None) ; return Py_None ; } static PyObject *fmes_end(PyObject *self, PyObject *args) { free_global() ; Py_INCREF(Py_None) ; return Py_None ; } static PyObject *match_end(PyObject *self, PyObject *args) { free_dicts() ; Py_INCREF(Py_None) ; return Py_None ; } /* look in mapping's couples for an occurence of couple * return 1 if found, None either */ static PyObject *has_couple(PyObject *self, PyObject *args) { PyObject *object1, *object2, *couple; int i; if (!PyArg_ParseTuple(args, "OO", &object1, &object2)) return NULL; for (i=0; i= _T_treshold) { return Py_BuildValue("i", 1) ; } else { Py_INCREF(Py_None); return Py_None; } } static int is_equal( PyObject* equal, PyObject* X, int x, PyObject* Y, int y ) { PyObject *ob1, *ob2, *args, *res; ob1 = PySequence_GetItem( X, x ); ob2 = PySequence_GetItem( Y, y ); args = Py_BuildValue( "NN", ob1, ob2 ); res = PyObject_CallObject( equal, args ); Py_DECREF( args ); return PyObject_IsTrue( res ); } static int get_v( int* V, int d, int k ) { /* accessor function for V[d][K] which is stored as a triangle matrix */ return V[d+k+1]; } static void set_v( int* V, int d, int k, int v ) { /* accessor function for V[d][K] which is stored as a triangle matrix */ V[d+k+1] = v; } static PyObject* lcs2( PyObject* self, PyObject* args ) { PyObject *X, *Y, *equal, *result, *ox, *oy; int N, M, max, D, k, x, y, nmax; int **V, *res, *resp; int xs, idx, v_up, v_down; if (!PyArg_ParseTuple( args, "OOO", &X,&Y,&equal )) return NULL; if (!PySequence_Check(X) || !PySequence_Check(Y)) { PyErr_SetString( PyExc_TypeError, "First two args must be sequences" ); return NULL; } N = PySequence_Length(X); M = PySequence_Length(Y); max = N + M; V = (int**)malloc( (2*max+1)*sizeof(int*) ); memset( V, 0, (2*max+1)*sizeof(int*) ); V[0] = (int*)malloc( 3*sizeof(int) ); memset( V[0], 0, 3*sizeof(int) ); nmax = (N>M ? N : M ); res = (int*)malloc( nmax*sizeof(int)*2 ); for(D=0;D<=max;++D) { V[D+1] = (int*)malloc( (2*D+5)*sizeof(int) ); memset( V[D+1], 0, (2*D+5)*sizeof(int) ); for(k=-D;k<=D;k+=2) { v_up = get_v( V[D], D, k+1 ); v_down = get_v( V[D], D, k-1 ); if ( k==-D || ( k != D && v_down < v_up) ) { x = v_up; } else { x = v_down + 1; } y = x - k; while ( x < N && y < M && is_equal( equal, X, x, Y, y ) ) { x += 1; y += 1; /*common = 0;*/ } set_v(V[D+1],D+1,k,x); if ( x >= N && y >= M ) { /* build the maximal path */ k = N-M; resp = res; x = N; y = M; while(D>=0) { v_up = get_v( V[D], D, k+1 ); v_down = get_v( V[D], D, k-1 ); if ( k==-D || ( k != D && v_down < v_up) ) { xs = v_up; k = k + 1; } else { xs = v_down + 1; k = k - 1; } while( x>xs ) { *resp++ = --x; *resp++ = --y; } x = get_v(V[D],D,k); y = x - k; D -= 1; } /* now builds the python list from res */ result = PyList_New( (resp-res)/2 ); idx = 0; resp-=2; while(resp>=res) { ox = PySequence_GetItem( X, resp[0] ); oy = PySequence_GetItem( Y, resp[1] ); PyList_SetItem( result, idx, Py_BuildValue("NN",ox,oy) ); resp-=2; idx+=1; } for(idx=0;idx. xmldiff-0.6.10/test/0000755000201200020120000000000011441141233013112 5ustar alainalainxmldiff-0.6.10/test/regrtest.py0000644000201200020120000000626511434467465015360 0ustar alainalain""" xmldiff non regression test """ from os.path import join, basename from cStringIO import StringIO import sys import os import unittest import glob from xmldiff import main DATA_DIR = 'data' class BaseTest(unittest.TestCase): def check_output(self, options, expected): try: output = os.popen('%s %s %s' % (sys.executable, main.__file__, ' '.join(options))) except SystemExit: pass data = output.read().strip() output.close() self.assertEqual(data, expected, '%s:\n%r != %r' % (self.name, data, expected) ) class DiffTest(BaseTest): def test_known(self): old = self.data['old'] new = self.data['new'] for options, res_file in self.data['result']: options = options + [old, new] f = open(res_file) expected = f.read().strip() f.close() self.check_output(options, expected) class RecursiveDiffTest(BaseTest): name = 'RecursiveDiffTest' def test(self): options = ['-r', join(DATA_DIR, 'dir1'), join(DATA_DIR, 'dir2')] expected = """-------------------------------------------------------------------------------- FILE: onlyindir1.xml deleted -------------------------------------------------------------------------------- FILE: onlyindir2.xml added -------------------------------------------------------------------------------- FILE: inbothdir.xml""" self.check_output(options, expected) def make_tests(): """generate tests classes from test info return the list of generated test classes """ tests_files = glob.glob(join(DATA_DIR, '*.xml')) + glob.glob(join(DATA_DIR, '*_result')) + glob.glob(join(DATA_DIR, '*_result_xupdate')) tests = {} # regroup test files for filename in tests_files: base = basename(filename) name = base[:6] filetype = base[7:] if filetype == '1.xml': tests.setdefault(name, {})['old'] = filename elif filetype == '2.xml': tests.setdefault(name, {})['new'] = filename else: options = filetype.split('_')[:-1] tests.setdefault(name, {}).setdefault('result', []).append( [options, filename]) result = [] for t_name, t_dict in tests.items(): try: old = t_dict['old'] new = t_dict['new'] res_data = t_dict['result'] except KeyError, e: msg = '** missing files in %s (%s)' % (t_name, e) print >>sys.stderr, msg continue class DiffTestSubclass(DiffTest): name = t_name data = t_dict result.append(DiffTestSubclass) return result def suite(): return unittest.TestSuite([unittest.makeSuite(test) for test in make_tests() + [RecursiveDiffTest]]) def Run(runner=None): testsuite = suite() if runner is None: runner = unittest.TextTestRunner() return runner.run(testsuite) if __name__ == '__main__': Run() xmldiff-0.6.10/test/runtests.py0000644000201200020120000000020511434467465015374 0ustar alainalainfrom logilab.common.testlib import main if __name__ == '__main__': import sys, os main(os.path.dirname(sys.argv[0]) or '.') xmldiff-0.6.10/test/unittest_options.py0000644000201200020120000000061611434467465017145 0ustar alainalain import unittest import os import xmldiff.main _xmlpath = xmldiff.main.__file__ class TestProfile(unittest.TestCase): def test_profiler(self): res = os.system("python %s --profile=test.prof data/test00_1.xml data/test00_2.xml" % _xmlpath ) self.assert_( res == 0 ) self.assert_( os.access( "test.prof", os.R_OK ) ) if __name__ == "__main__": unittest.main() xmldiff-0.6.10/test/unittest_difflib.py0000644000201200020120000000126611434467465017053 0ustar alainalain import unittest from xmldiff.mydifflib import quick_ratio, lcs2 def _cmp( a, b ): return a==b class TestLcs2(unittest.TestCase): def help_test(self, seq1, seq2, res ): seq = lcs2( seq1, seq2, _cmp ) self.assertEqual( seq, zip( res, res ) ) def test_lcs_1(self): self.help_test( "abcdefghijkl", "bcdeghijk", "bcdeghijk" ) def test_lcs_2(self): self.help_test( "abdefghijkl", "bcdeghijk", "bdeghijk" ) def test_lcs_3(self): self.help_test( "abdefghijkl", "bxcydzewgzhijk", "bdeghijk" ) def test_lcs_4(self): self.help_test( "abdefghijkl", "zzzbcdeghijk", "bdeghijk" ) if __name__ == "__main__": unittest.main() xmldiff-0.6.10/test/unittest_parser.py0000644000201200020120000001442011434467465016744 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ unit tests for xmldiff. """ import unittest import cStringIO from copy import deepcopy #import xmldiff.ezs from xmldiff.objects import * from xmldiff.ezs import trees_equal import sys class Tree2TreeKnownValues(unittest.TestCase): """ This class check that source_vector functions give known result with known input """ DATA = cStringIO.StringIO(""" syt@logilab.org /home/syt/.netscape/bookmarks.html """) from xmldiff.parser import SaxHandler from xml.sax import make_parser dh = SaxHandler(1, 1) parser = make_parser() parser.setContentHandler(dh) parser.parse(DATA) xmltree1 = dh.get_tree() DATA = cStringIO.StringIO(""" syt@logab.org /home/syt/.netscape/bookmarks.html """) dh.__init__(1, 1) parser.parse(DATA) xmltree2 = dh.get_tree() # those variables may change if the "DATA" string change # this is the post order numbers HKNOWN_VALUES = { 'N/a[0]': 14, 'a/a[0]': 1, 'N/a[0]/b[0]': 3, 'b/a[0]/b[0]': 2, 'N/a[0]/c[0]': 5, 'c/a[0]/c[0]': 4, 'N/a[0]/d[0]': 13, 'd/a[0]/d[0]': 6, 'N/a[0]/d[0]/e[0]': 10, 'e/a[0]/d[0]/e[0]': 7, 'N/a[0]/d[0]/e[0]/h[0]': 9, 'h/a[0]/d[0]/e[0]/h[0]': 8, 'N/a[0]/d[0]/f[0]': 12, 'f/a[0]/d[0]/f[0]': 11 } dh.__init__(1, 1) parser.parse(cStringIO.StringIO(""" """)) tree1 = dh.get_tree() dh.__init__(1, 1) parser.parse(cStringIO.StringIO(""" """)) tree2 = dh.get_tree() def setUp (self): """ called before each test from this class """ self.nl1, self.nl2 = [], [] ## def test_po_known_values(self): ## """ ## post_order should give known result with known input ## """ ## ezsc = ezs.EzsCorrector() ## ezsc._post_order(self.tree1, self.nl1, TRUE) ## num = 1 ## for node in self.nl1[:-1]: ## self.assert_(num == self.HKNOWN_VALUES[node[N_VALUE] + f_xpath(node)], ## 'Post order Error for '+f_xpath(node)) ## num = num + 1 ## def test_st_known_values(self): ## """ ## swap_trees should give known result with known input ## """ ## ezsc = ezs.EzsCorrector() ## ezsc._post_order(self.tree1, self.nl1, TRUE) ## n1 = self.tree1[N_CHILDS][0][N_CHILDS][1] ## s1 = self.tree1[N_CHILDS][0][N_CHILDS][2] ## self.assert_(ezs.swap_trees(n1, s1, n1, s1) == ezs.C_INFINI, 'SWAP') ## self.assert_(ezs.swap_trees(n1, s1, s1, n1) == ezs.C_SWAP, 'SWAP') ## def test_ezs_known_values(self): ## """ ## ezs should give known result with known input ## """ ## ezsc = ezs.EzsCorrector() ## actions = ezsc.process_trees(self.tree1, self.tree2) ## action_list1= ['append', 'swap'] ## for i in range(len(actions)): ## self.assert_(actions[i][A_DESC] == action_list1[i]) ## actions = ezsc.process_trees(self.xmltree1, self.xmltree2) ## #import format ## #format.factions_print(actions) ## action_list1= ['insert-after', 'remove', 'update', 'update', 'swap'] ## for i in range(len(actions)): ## self.assert_(actions[i][0] == action_list1[i]) def test_sanity(self): """ global sanity check for do() function """ self.assert_(trees_equal(self.tree1, self.tree1), 'Tree unequal error') self.assert_(not trees_equal(self.tree1, self.tree2), 'Tree equal error') # print "SOURCE TREE" # xml_print(self.tree1) # print "DESTINATION TREE" # xml_print(self.tree2) def suite(): """return the unitest suite""" loader = unittest.TestLoader() module = sys.modules[__name__] if __name__ == '__main__' and len(sys.argv) > 1: return loader.loadTestsFromNames(sys.argv[1:], module) return loader.loadTestsFromModule(module) def Run(runner=None): """run tests""" testsuite = suite() if runner is None: runner = unittest.TextTestRunner() # uncomment next line to write tests results in a file #runner.__init__(open('tests.log','w+')) return runner.run(testsuite) if __name__ == '__main__': Run() xmldiff-0.6.10/test/README0000644000201200020120000000077411434467463014024 0ustar alainalainData files can be added in the data/ subdirectory, with the following naming conventions: * files belonging to the same test share the first 6 characters of their name * the 7th character is an underscore * original file name ends with 1.xml * modified file name ends with 2.xml * result file names end with _result, and can contain command line options to be passed to xmldiff separated with underscores Example: test02_1.xml test02_2.xml test02_result test02_--xupdate_result -- Alexandre Fayolle xmldiff-0.6.10/test/data/0000755000201200020120000000000011441141234014024 5ustar alainalainxmldiff-0.6.10/test/data/test03_1.xml0000644000201200020120000000004611434467464016132 0ustar alainalainmoretexthehe xmldiff-0.6.10/test/data/test08_2.xml0000644000201200020120000000176111434467464016145 0ustar alainalain 1 2 2.1 3 1 3 2 1 3 This WAS the fourth sentence. This is the fifth sentence. This is the and improved sixth sentence. This is the seventh sentence. This is the Eighth sentence. This is (changed) the Ninth sentence. This is now the Tenth sentence. xmldiff-0.6.10/test/data/test04_1.xml0000644000201200020120000000077711434467464016146 0ustar alainalain almastlogilab.org xmldiff-0.6.10/test/data/test07_--xupdate_result0000644000201200020120000000032711434467464020403 0ustar alainalain xmldiff-0.6.10/test/data/test03_2.xml0000644000201200020120000000007311434467464016133 0ustar alainalainiihehe moretext xmldiff-0.6.10/test/data/test01_1.xml0000644000201200020120000000006611434467464016132 0ustar alainalain xmldiff-0.6.10/test/data/test02_2.xml0000644000201200020120000000003211434467464016125 0ustar alainalainiimoretext xmldiff-0.6.10/test/data/test00_--xupdate_result0000644000201200020120000000251611434467464020376 0ustar alainalain box hoye! syt@logilab.org val new attribute it italian 7797 xmldiff-0.6.10/test/data/test05_1.xml0000644000201200020120000000162511434467464016140 0ustar alainalain xmldiff-0.6.10/test/data/test00_1.xml0000644000201200020120000000065311434467464016133 0ustar alainalain almaster@logilab.org xmldiff-0.6.10/test/data/test07_2.xml0000644000201200020120000000001511434467464016133 0ustar alainalaintexte xmldiff-0.6.10/test/data/test03_--xupdate_result0000644000201200020120000000122611434467464020376 0ustar alainalain moretext branch ii hehe ii xmldiff-0.6.10/test/data/test00_result0000644000201200020120000000120211434467464016501 0ustar alainalain[rename, /memory[1]/mailbox[1], box] [insert-after, /memory[1]/spoken-languages[1], hoye! ] [update, /memory[1]/email_addr[1]/text()[1], syt@logilab.org] [rename, /memory[1]/junkbuster-method[1]/@value, val] [append-first, /memory[1]/junkbuster-method[1], ] [append, /memory[1]/spoken-languages[1], <@new> new attribute ] [insert-after, /memory[1]/spoken-languages[1]/language[2], ] [update, /memory[1]/server-socket[2]/@port, 7797] [remove, /memory[1]/spoken-languages[1]/language[1]] [remove, /memory[1]/spoken-languages[1]/language[3]] xmldiff-0.6.10/test/data/test08_result0000644000201200020120000000316211434467465016521 0ustar alainalain[insert-after, /Tests[1]/Test[8], This is the seventh sentence. ] [insert-after, /Tests[1]/Test[9], This is the Eighth sentence. ] [insert-after, /Tests[1]/Test[1]/One[2], 2.1 ] [insert-after, /Tests[1]/Test[3]/Three[2], 1 ] [remove, /Tests[1]/Test[5]/@type] [append, /Tests[1]/Test[7], <@LogilabXmldiffTmpAttrtype> Insert mixed element ] [rename, /Tests[1]/Test[7]/Seven[1], Five] [remove, /Tests[1]/Test[6]/@type] [append, /Tests[1]/Test[8], <@LogilabXmldiffTmpAttrtype> Insert mixed element with text ] [rename, /Tests[1]/Test[8]/Eight[1], Six] [move-first, /Tests[1]/Test[7]/@type, /Tests[1]/Test[9]] [move-first, /Tests[1]/Test[8]/@type, /Tests[1]/Test[10]] [update, /Tests[1]/Test[4]/Four[1]/text()[1], This WAS the fourth sentence.] [update, /Tests[1]/Test[7]/Five[1]/text()[1], This is the] [insert-after, /Tests[1]/Test[7]/Five[1]/text()[1], ] [update, /Tests[1]/Test[7]/Five[1]/text()[1], fifth sentence.] [update, /Tests[1]/Test[8]/Six[1]/text()[1], This is the] [insert-after, /Tests[1]/Test[8]/Six[1]/text()[1], and improved ] [update, /Tests[1]/Test[8]/Six[1]/text()[1], sixth sentence.] [insert-after, /Tests[1]/Test[11]/Nine[1]/text()[1], (changed) ] [remove, /Tests[1]/Test[2]/Two[2]] [remove, /Tests[1]/Test[3]/Three[1]] [remove, /Tests[1]/Test[5]] [remove, /Tests[1]/Test[5]] [remove, /Tests[1]/Test[5]/Five[1]/b[1]] [remove, /Tests[1]/Test[6]/Six[1]/b[1]] [remove, /Tests[1]/Test[9]/Nine[1]/b[2]] [rename, //LogilabXmldiffTmpAttrtype, type] xmldiff-0.6.10/test/data/test06_result0000644000201200020120000000025011434467464016511 0ustar alainalain[insert-after, /a[1]/text()[1], ] [move-after, /a[1]/b[1], /a[1]/text()[2]] [remove, /a[1]/text()[1]] [remove, /a[1]/LogilabXMLDIFFFAKETag[1]] xmldiff-0.6.10/test/data/test05_result0000644000201200020120000000134711434467464016520 0ustar alainalain[insert-after, /bean[1]/add[1]/bean[1]/property[2], ] [move-first, /bean[1]/add[1]/bean[1]/add[2]/bean[1]/@class, /bean[1]/add[1]/bean[1]/add[1]/bean[1]] [move-after, /bean[1]/add[1]/bean[1]/add[2]/bean[1]/event-binding[1], /bean[1]/add[1]/bean[1]/add[1]/bean[1]/property[3]] [update, /bean[1]/add[1]/bean[1]/add[1]/bean[1]/@class, java.awt.Scrollbar] [update, /bean[1]/add[1]/bean[1]/add[1]/bean[1]/event-binding[1]/@name, adjustment] [update, /bean[1]/add[1]/bean[1]/add[1]/bean[1]/event-binding[1]/@targetObject, adjustmenthandler] [remove, /bean[1]/add[1]/bean[1]/add[2]] xmldiff-0.6.10/test/data/test07_1.xml0000644000201200020120000000003411434467464016133 0ustar alainalaintexteautre texte xmldiff-0.6.10/test/data/test06_1.xml0000644000201200020120000000003211434467464016130 0ustar alainalainiimoretext xmldiff-0.6.10/test/data/test03_result0000644000201200020120000000025711434467464016515 0ustar alainalain[append-first, /, moretext ] [rename, /a[2], branch] [move-first, /branch[1], /a[1]] [append-first, /a[1]/branch[1], ii ] [remove, /a[1]/branch[1]/b[1]] xmldiff-0.6.10/test/data/test08_--xupdate_result0000644000201200020120000000610411434467464020403 0ustar alainalain This is the seventh sentence. This is the Eighth sentence. 2.1 1 Insert mixed element Five Insert mixed element with text Six Delete mixed element Delete mixed element with text This WAS the fourth sentence. This is the fifth sentence. This is the and improved sixth sentence. (changed) type xmldiff-0.6.10/test/data/test06_2.xml0000644000201200020120000000002711434467464016135 0ustar alainalainmoretext xmldiff-0.6.10/test/data/test04_result0000644000201200020120000000222611434467464016514 0ustar alainalain[move-first, /memory[1]/comment()[1], /memory[1]] [update, /memory[1]/comment()[1], new comment] [insert-after, /memory[1]/comment()[1], and new text ] [rename, /memory[1]/mailbox[1], box] [insert-after, /memory[1]/box[1], ] [insert-after, /memory[1]/email_addr[1], insert test ] [insert-after, /memory[1]/junkbuster-method[1], ] [insert-after, /memory[1]/spoken-languages[1], hoye! ] [insert-after, /memory[1]/test[1], and some new text ] [rename, /memory[1]/box[1]/@path, pathe] [update, /memory[1]/email_addr[1]/text()[1], syt@logilab.org] [rename, /memory[1]/junkbuster-method[1]/@value, val] [append-first, /memory[1]/junkbuster-method[1], ] [append, /memory[1]/spoken-languages[1], <@new> new attribute ] [insert-after, /memory[1]/spoken-languages[1]/language[2], ] [update, /memory[1]/server-socket[1]/@port, 7rm -rf tm776] [update, /memory[1]/server-socket[2]/@port, 7797] [remove, /memory[1]/@attr] [remove, /memory[1]/spoken-languages[1]/language[1]] [remove, /memory[1]/spoken-languages[1]/language[3]] xmldiff-0.6.10/test/data/test07_result0000644000201200020120000000005711434467464016517 0ustar alainalain[remove, /a[1]/text()[2]] [remove, /a[1]/b[1]] xmldiff-0.6.10/test/data/test02_--xupdate_result0000644000201200020120000000077511434467464020405 0ustar alainalain ii b moretext xmldiff-0.6.10/test/data/test00_2.xml0000644000201200020120000000075511434467464016137 0ustar alainalain syt@logilab.org hoye! xmldiff-0.6.10/test/data/test06_--xupdate_result0000644000201200020120000000103311434467464020375 0ustar alainalain xmldiff-0.6.10/test/data/test04_--xupdate_result0000644000201200020120000000501411434467464020376 0ustar alainalain new comment new comment and new text box insert test update comment hoye! and some new text pathe syt@logilab.org val bip 7776 new attribute it italian 7rm -rf tm776 7797 xmldiff-0.6.10/test/data/test08_1.xml0000644000201200020120000000172711434467464016146 0ustar alainalain 1 2 3 1 2 3 1 2 3 This is the fourth sentence. This is the fifth sentence. This is the sixth sentence. This is the seventh sentence. This is now the Eighth sentence. This is now the Ninth sentence. This is now the Tenth sentence. xmldiff-0.6.10/test/data/test01_result0000644000201200020120000000006011434467464016503 0ustar alainalain[append-first, /, ] [remove, /oopoyy[1]] xmldiff-0.6.10/test/data/test05_--xupdate_result0000644000201200020120000000272111434467464020401 0ustar alainalain java.awt.Scrollbar adjustment adjustmenthandler java.awt.Scrollbar adjustment adjustmenthandler xmldiff-0.6.10/test/data/test02_1.xml0000644000201200020120000000002711434467464016130 0ustar alainalainmoretext xmldiff-0.6.10/test/data/dir1/0000755000201200020120000000000011441141234014663 5ustar alainalainxmldiff-0.6.10/test/data/dir1/onlyindir1.xml0000644000201200020120000000001011434467464017506 0ustar alainalain xmldiff-0.6.10/test/data/dir1/inbothdir.xml0000644000201200020120000000001011434467464017400 0ustar alainalain xmldiff-0.6.10/test/data/test02_result0000644000201200020120000000016211434467464016507 0ustar alainalain[append-first, /, ii ] [rename, /a[2], b] [move-after, /b[1], /a[1]/text()[1]] [remove, /a[1]/b[1]/b[1]] xmldiff-0.6.10/test/data/test05_2.xml0000644000201200020120000000172611434467464016143 0ustar alainalain xmldiff-0.6.10/test/data/test04_2.xml0000644000201200020120000000124111434467464016132 0ustar alainalain and new text syt@logilab.org insert test hoye! and some new text xmldiff-0.6.10/test/data/test01_--xupdate_result0000644000201200020120000000043511434467464020375 0ustar alainalain xmldiff-0.6.10/test/data/test01_2.xml0000644000201200020120000000003511434467464016127 0ustar alainalain xmldiff-0.6.10/test/data/dir2/0000755000201200020120000000000011441141234014664 5ustar alainalainxmldiff-0.6.10/test/data/dir2/onlyindir2.xml0000644000201200020120000000001011434467464017510 0ustar alainalain xmldiff-0.6.10/test/data/dir2/inbothdir.xml0000644000201200020120000000001011434467464017401 0ustar alainalain xmldiff-0.6.10/xsl/0000755000201200020120000000000011441141234012742 5ustar alainalainxmldiff-0.6.10/xsl/docbook_rev.xsl0000644000201200020120000000223111434467465016007 0ustar alainalain
Revisionflag on unexpected element: (Assuming block)
xmldiff-0.6.10/xsl/xmlrev.xslt0000644000201200020120000000761111434467465015223 0ustar alainalain 1.0 xml yes node() *|@*|text() @*|text() . revisionflag changed revisionflag removed FIXME/TODO: xupdate:insert-before is not implemented yet. Care to help ? FIXME/TODO: xupdate:insert-after is not implemented yet. Care to help ? revisionflag removed @*|*|text() FIXME/TODO: xupdate:append is not implemented yet. Care to help ? xmldiff-0.6.10/format.py0000644000201200020120000002641111434467463014023 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ this module provides classes to format the native tree2tree output """ import types try: from xml.dom import EMPTY_NAMESPACE as NO_NS except: NO_NS = None from xmldiff.objects import A_N1, A_N2, A_DESC, N_PARENT, caract, \ xml_print, f_xpath, XUPD_PREFIX, XUPD_URI, to_dom from sys import stdout def get_attrs_string(attrs): """ extract and return a string corresponding to an attributes list """ attr_s = [] for attr_n, attr_v in attrs: attr_s.append('%s="%s" '%(attr_n, attr_v)) return ' '.join(attr_s) ## XUPDATE FUNCTIONS ########################################################## def open_xupdate_node(type, attrs, indent='', stream=stdout): """print opening xupdate tag""" stream.write('<%s:%s %s>' % (XUPD_PREFIX, type, get_attrs_string(attrs))) def close_xupdate_node(action, indent='', stream=stdout): """print closing xupdate tag""" stream.write('\n' % (XUPD_PREFIX, action)) def write_xupdate_node(type, attrs, indent='', stream=stdout): """print single xupdate tag""" stream.write('<%s:%s %s/>\n' % (XUPD_PREFIX, type, get_attrs_string(attrs))) ## Formatter interface ######################################################## class AbstractFormatter: """ Formatter interface """ def init(self, stream=stdout): """ method called before the begining of the tree 2 tree correction """ self.edit_s = [] self._stream = stream def add_action(self, action): """ method called when an action is added to the edit script """ self.edit_s.append(action) def format_action(self, action): """ method called by end() to format each action in the edit script at least this method should be overridden """ raise NotImplementedError() def end(self): """ method called at the end of the tree 2 tree correction """ for action in self.edit_s: self.format_action(action) ## Internal Formatter ########################################################## class InternalPrinter(AbstractFormatter): """ print actions in the internal format """ def add_action(self, action): """ See AbstractFormatter interface """ if len(action) > 2 and type(action[A_N2]) == types.ListType: if type(action[A_N1]) == types.ListType: #swap or move node action[A_N1] = f_xpath(action[A_N1]) action[A_N2] = f_xpath(action[A_N2]) AbstractFormatter.add_action(self, action) def format_action(self, action): """ See AbstractFormatter interface """ if len(action) > 2 and type(action[A_N2]) == types.ListType: self._stream.write('[%s, %s,\n' % (action[A_DESC], action[A_N1])) xml_print(action[A_N2]) self._stream.write("]\n") elif len(action) > 2: self._stream.write('[%s, %s, %s]\n' % (action[A_DESC], action[A_N1], action[A_N2])) else: self._stream.write('[%s, %s]\n' % (action[A_DESC], action[A_N1])) ## XUpdate Formatters (text / DOM) ############################################# class XUpdateMixIn: """ XUpdate mixin to preprocess added actions """ def add_action(self, action): """ See AbstractFormatter interface """ if action[A_DESC] == 'move-first': # replace move-first with remove and insert (sibling nodes) self.edit_s.append(('remove', f_xpath(action[A_N1]))) self.edit_s.append(('append', f_xpath(action[A_N2]), action[A_N1])) elif action[A_DESC] == 'move-after': # replace move-after with remove and insert (sibling nodes) self.edit_s.append(('remove', f_xpath(action[A_N1]))) self.edit_s.append(('insert-after', f_xpath(action[A_N2]), action[A_N1])) elif action[A_DESC] == 'move-and-rename': # replace move-and-rename with remove and append (attribute nodes) self.edit_s.append(('remove', f_xpath(action[A_N1]))) self.edit_s.append(('append', f_xpath(action[A_N2][N_PARENT]), action[A_N2])) elif action[A_DESC] == 'swap': # replace swap with remove and insert (sibling nodes) self.edit_s.append(('remove', f_xpath(action[A_N2]))) self.edit_s.append(('insert-after', f_xpath(action[A_N1]), action[A_N2])) else: self.edit_s.append(action) class XUpdatePrinter(XUpdateMixIn, AbstractFormatter): """ take the actions list in standard format and output it following Xupdate xml specification """ def init(self, stream = stdout): """ See AbstractFormatter interface """ AbstractFormatter.init(self, stream) self._stream.write(''' \n''' % (XUPD_PREFIX, XUPD_URI)) def format_action(self, action, indent=' '): """ See AbstractFormatter interface """ if action[A_DESC] == 'remove': write_xupdate_node(action[A_DESC], (('select', action[A_N1]), ), indent, self._stream) elif action[A_DESC] == 'append-last': open_xupdate_node('append', (('select', action[A_N1]), ('child', 'last()')), indent, self._stream) xml_print(action[A_N2], indent, xupdate=1, stream=self._stream) close_xupdate_node('append', indent, self._stream) elif action[A_DESC] == 'append-first': open_xupdate_node('append', (('select', action[A_N1]), ('child', 'first()')), indent, self._stream) xml_print(action[A_N2], indent, xupdate=1, stream=self._stream) close_xupdate_node('append', indent, self._stream) elif action[A_DESC] in ['append', 'insert-after']: open_xupdate_node(action[A_DESC], (('select', action[A_N1]), ), indent, self._stream) xml_print(action[A_N2], indent, xupdate=1, stream=self._stream) close_xupdate_node(action[A_DESC], indent, self._stream) elif action[A_DESC] == 'rename': open_xupdate_node(action[A_DESC], (('select', action[A_N1]), ), indent, self._stream) self._stream.write(action[A_N2]) close_xupdate_node(action[A_DESC], indent, self._stream) else: open_xupdate_node(action[A_DESC], (('select', action[A_N1]), ), indent, self._stream) self._stream.write(action[A_N2]) close_xupdate_node(action[A_DESC], indent, self._stream) self._stream.write('\n') self._stream.flush() def end(self): """ See AbstractFormatter interface """ AbstractFormatter.end(self) self._stream.write(''%XUPD_PREFIX) class DOMXUpdateFormatter(XUpdateMixIn, AbstractFormatter): """ take the actions list in standard format and return a dom tree which follow Xupdate xml specification (without xupdate namespace) dom tree is append to doc (DOM Document node) """ def __init__(self, doc, encoding='UTF-8'): """ Instance attributes are doc and encoding """ self.doc = doc self.encoding = encoding def init(self): """ See AbstractFormatter interface """ AbstractFormatter.init(self) output = self.doc.createElementNS(XUPD_URI, '%s:modifications'%XUPD_PREFIX) output.setAttributeNS(NO_NS, 'version', '1.0') self.output = output def format_action(self, action): """ See AbstractFormatter interface """ doc = self.doc if action[A_DESC] == 'remove': node = doc.createElementNS(XUPD_URI, '%s:%s' % (XUPD_PREFIX, action[A_DESC])) node.setAttributeNS(NO_NS, 'select', action[A_N1]) elif action[A_DESC] == 'append-first': node = doc.createElementNS(XUPD_URI, '%s:%s'% (XUPD_PREFIX, 'append')) node.setAttributeNS(NO_NS, 'select', action[A_N1]) node.setAttributeNS(NO_NS, 'child', 'first()') node.appendChild(to_dom(action[A_N2], doc, XUPD_URI, XUPD_PREFIX)) elif action[A_DESC] == 'append-last': node = doc.createElementNS(XUPD_URI, '%s:%s' % (XUPD_PREFIX, 'append')) node.setAttributeNS(NO_NS, 'select', action[A_N1]) node.setAttributeNS(NO_NS, 'child', 'last()') node.appendChild(to_dom(action[A_N2], doc, XUPD_URI, XUPD_PREFIX)) elif action[A_DESC] in ['append', 'insert-after', 'insert-before']: node = doc.createElementNS(XUPD_URI, '%s:%s' % (XUPD_PREFIX, action[A_DESC])) node.setAttributeNS(NO_NS, 'select', action[A_N1]) node.appendChild(to_dom(action[A_N2], doc, XUPD_URI, XUPD_PREFIX)) elif action[A_DESC] == 'rename': node = doc.createElementNS(XUPD_URI, '%s:%s' %(XUPD_PREFIX, action[A_DESC])) node.setAttributeNS(NO_NS, 'name', action[A_N1]) v = unicode(action[A_N2], self.encoding) node.appendChild(doc.createTextNode(v)) else: node = doc.createElementNS(XUPD_URI, '%s:%s' % (XUPD_PREFIX, action[A_DESC])) node.setAttributeNS(NO_NS, 'select', action[A_N1]) v = unicode(action[A_N2], self.encoding) node.appendChild(doc.createTextNode(v)) # append xupdate node self.output.appendChild(node) xmldiff-0.6.10/DEPENDS0000644000201200020120000000001311434467462013153 0ustar alainalainpython-xml xmldiff-0.6.10/input.py0000644000201200020120000000756411434467463013702 0ustar alainalain""" Provides functions for converting DOM tree or xml file in order to process it with xmldiff functions. """ # Copyright (c) 2001 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. def tree_from_stream(stream, norm_sp=1, ext_ges=0, ext_pes=0, include_comment=1, encoding='UTF-8', html=0): """ create internal tree from xml stream (open file or IOString) if norm_sp = 1, normalize space and new line """ from xml.sax import make_parser, SAXNotRecognizedException from xml.sax.handler import feature_namespaces, feature_external_ges, \ feature_external_pes, property_lexical_handler from xmldiff.parser import SaxHandler handler = SaxHandler(norm_sp, include_comment, encoding) if html: parser = make_parser(["xml.sax.drivers2.drv_sgmlop_html"]) else: parser = make_parser() # do not perform Namespace processing parser.setFeature(feature_namespaces, 0) # do not include any external entities try: parser.setFeature(feature_external_ges, ext_ges) #xml.sax._exceptions. except SAXNotRecognizedException: print 'Unable to set feature external ges' try: parser.setFeature(feature_external_pes, ext_pes) #xml.sax._exceptions. except SAXNotRecognizedException: print 'Unable to set feature external pes' # add lexical handler for comments, entities, dtd and cdata parser.setProperty(property_lexical_handler, handler) parser.setContentHandler(handler) parser.parse(stream) return handler.get_tree() def tree_from_dom(root, ezs=0): """ create internal tree from DOM subtree """ from xml.dom.ext.Dom2Sax import Dom2SaxParser from xml.sax.handler import feature_namespaces, property_lexical_handler #from parser import DomParser parser = Dom2SaxParser() from xmldiff.parser import SaxHandler handler = SaxHandler(normalize_space=0, include_comment=1) # do not perform Namespace processing parser.setFeature(feature_namespaces, 0) # add lexical handler for comments, entities, dtd and cdata parser.setProperty(property_lexical_handler, handler) parser.setContentHandler(handler) parser.parse(root) return handler.get_tree() if __name__ == '__main__': from xml.dom.ext import StripXml, PrettyPrint from xml.dom.ext.reader.Sax2 import Reader import sys reader = Reader() file = open(sys.argv[1],'r') fragment = reader.fromStream(file) d = StripXml(fragment) file.close() tree = tree_from_dom(d) file = open(sys.argv[2],'r') fragment = reader.fromStream(file) d = StripXml(fragment) file.close() tree2 = tree_from_dom(d) from xmldiff.objects import repr print 'Source tree', repr(tree) print 'Destination tree', repr(tree2) #from ezs import EzsCorrector #strategy = EzsCorrector() from xmldiff.fmes import FmesCorrector strategy = FmesCorrector(0.59, 0.5) #from ezs import process actions = strategy.process_trees(tree, tree2) from xmldiff.format import xupdate_dom PrettyPrint( xupdate_dom( reader.fromString(''), actions)) xmldiff-0.6.10/bin/0000755000201200020120000000000011441141233012703 5ustar alainalainxmldiff-0.6.10/bin/xmldiff0000755000201200020120000000007011434467463014301 0ustar alainalain#!/usr/bin/python from xmldiff import main main.run() xmldiff-0.6.10/bin/xmldiff.bat0000644000201200020120000000006511434467463015047 0ustar alainalain@python -c "from xmldiff import main; main.run()" %* xmldiff-0.6.10/bin/xmlrev0000755000201200020120000000465411434467463014201 0ustar alainalain#!/bin/bash # # (c) 2001-2004 Nicolas Chauvat - License is GPL set -e DIFF="xmldiff -x" TMPFILE_S=$(mktemp /tmp/xmlrev.start.XXXXXX) || exit 1 TMPFILE_X=$(mktemp /tmp/xmlrev.dest.XXXXXX) || exit 1 TMPFILE_D=$(mktemp /tmp/xmlrev.xupdate.XXXXXX) || exit 1 TMPFILE_XSLT=$(mktemp /tmp/xmlrev.xslt.XXXXXX) || exit 1 TMPFILES="$TMPFILE_S $TMPFILE_X $TMPFILE_D $TMPFILE_XSLT" function guess_doc_type() { # $1 filepath FT=`file -b -- "$1"` case "$FT" in *SGML*) echo SGML ;; *XML*) echo XML ;; *text*) case "$1" in *.xml|*.XML|*.xsl|*.XSL|*.xslt|*.XSLT) echo XML ;; esac ;; esac } function sgml_to_xml() { # $1 filepath sgmlnorm "$1" > "$TMPFILE_S" xmllint --sgml --nowarning "$TMPFILE_S" | grep -v " "$TMPFILE_X" echo $TMPFILE_X ;; XML) echo $1 ;; esac } function check_args(){ if [ -z "$1" ] || [ -z "$2" ] then usage exit 1 fi for filename in "$1" "$2" do if [ ! -e "$filename" ] then echo Error: echo $filename not found echo usage exit 2 fi DOC=`normalize "$filename"` if [ -z "$DOC" ] then echo Error: $filename is not an XML or SGML file usage exit 3 fi done } function diff() { check_args "$1" "$2" DOC_OLD=`normalize "$1"` DOC_NEW=`normalize "$2"` $DIFF "$DOC_OLD" "$DOC_NEW" || true } function revision() { check_args "$1" "$2" DOC_OLD=`normalize "$1"` DOC_NEW=`normalize "$2"` $DIFF "$DOC_OLD" "$DOC_NEW" > "$TMPFILE_D" || true xsltproc "$ML_DIR"/xmlrev.xslt "$TMPFILE_D" > "$TMPFILE_XSLT" xsltproc "$TMPFILE_XSLT" "$DOC_OLD" } function usage(){ echo "USAGE: $0 [OPTIONS] from_file to_file" echo echo OPTIONS: echo " --revision" echo " show difference between revisions as an HTML file (default)" echo " --diff" echo " show difference between revisions as xupdate" } ML_DIR=/usr/share/sgml/stylesheet/xmldiff case "$1" in --help) usage exit 0 ;; --revision) revision "$2" "$3" ;; --diff) diff "$2" "$3" ;; *) revision "$1" "$2" ;; esac rm -f $TMPFILES xmldiff-0.6.10/parser.py0000644000201200020120000001504011434467463014023 0ustar alainalain# Copyright (c) 2000 LOGILAB S.A. (Paris, FRANCE). # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details? # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ This file contains a parser to transform xml document into an internal tree in order to avoid adding new primitives with tree transformation This operation represent all the document in a tree without attributes on nodes nor text nodes, only nodes with a name and a child list (the tree is composed by elements of type Node, defined below) """ from xmldiff.objects import NT_ROOT, NT_NODE, NT_ATTN, NT_ATTV, \ NT_TEXT, NT_COMM, N_TYPE, N_ISSUE, N_CHILDS, N_VALUE, link_node from xml.sax import ContentHandler def _inc_xpath(h, xpath): try: h[xpath] = h[xpath] + 1 except KeyError: h[xpath] = 1 class SaxHandler(ContentHandler): """ Sax handler to transform xml doc into basic tree """ def __init__(self, normalize_space, include_comment, encoding='UTF-8'): self.encoding = encoding self._p_stack = [[NT_ROOT, '/', '', [], None, 0, 0]] self._norm_sp = normalize_space or None self._incl_comm = include_comment or None self._xpath = '' self._h = {} self._n_elmt = 0 ## method of the ContentHandler interface ################################# def startElement(self, name, attrs): name = name.encode(self.encoding) # process xpath self._xpath = "%s%s%s" % (self._xpath, '/', name) _inc_xpath(self._h, self._xpath) # nodes construction for element node = [NT_NODE, name, name, [], None, self._n_elmt+1, self._h[self._xpath]] self._n_elmt += 1 self._xpath = "%s%s%s%s" % (self._xpath, '[', self._h[self._xpath], ']') # nodes construction for element's attributes keys = attrs.keys() # sort attributes to avoid further moves keys.sort() for key in keys: key = key.encode(self.encoding) self._n_elmt += 2 attr_node = [NT_ATTN, '@%sName'%key, key, [], None, 1, 0] link_node(node, attr_node) link_node(attr_node, [NT_ATTV, '@%s'%key, attrs.get(key, '').encode(self.encoding), [],None,0,0]) link_node(self._p_stack[-1], node) # set current element on the top of the father stack self._p_stack.append(node) def endElement(self, name): # process xpath size = len(self._xpath) for i in range(size): size = size - 1 if self._xpath[-i - 1] == '/': break self._xpath = self._xpath[:size] self._p_stack[-1][N_ISSUE] = self._n_elmt - self._p_stack[-1][N_ISSUE] # remove last element from stack self._p_stack.pop() def characters(self, ch): if self._norm_sp is not None: ch = ' '.join(ch.split()) if len(ch) > 0 and ch != "\n" and ch != ' ': ch = ch.encode(self.encoding) parent = self._p_stack[-1] # if sibling text nodes if parent[N_CHILDS] and parent[N_CHILDS][-1][N_TYPE] == NT_TEXT: n = parent[N_CHILDS][-1] n[N_VALUE] = n[N_VALUE] + ch else: self._n_elmt += 1 xpath = '%s/text()' % self._xpath _inc_xpath(self._h, xpath) # nodes construction for text node = [NT_TEXT, 'text()', ch, [],None,0, self._h[xpath]] link_node(parent, node) ## method of the LexicalHandler interface ################################## def comment(self, content): if self._incl_comm is None: return if self._norm_sp is not None: content = ' '.join(content.split()) if len(content) > 0: self._n_elmt += 1 content = content.encode(self.encoding) xpath = '%s/comment()' % self._xpath _inc_xpath(self._h, xpath) # nodes construction for comment node = [NT_COMM, 'comment()', content, [], None, 0, self._h[xpath]] link_node(self._p_stack[-1], node) # methods from xml.sax.saxlib.LexicalHandler (avoid dependencie to pyxml) def startDTD(self, name, public_id, system_id): """Report the start of the DTD declarations, if the document has an associated DTD. A startEntity event will be reported before declaration events from the external DTD subset are reported, and this can be used to infer from which subset DTD declarations derive. name is the name of the document element type, public_id the public identifier of the DTD (or None if none were supplied) and system_id the system identfier of the external subset (or None if none were supplied).""" def endDTD(self): "Signals the end of DTD declarations." def startEntity(self, name): """Report the beginning of an entity. The start and end of the document entity is not reported. The start and end of the external DTD subset is reported with the pseudo-name '[dtd]'. Skipped entities will be reported through the skippedEntity event of the ContentHandler rather than through this event. name is the name of the entity. If it is a parameter entity, the name will begin with '%'.""" def endEntity(self, name): """Reports the end of an entity. name is the name of the entity, and follows the same conventions as for startEntity.""" def startCDATA(self): """Reports the beginning of a CDATA marked section. The contents of the CDATA marked section will be reported through the characters event.""" def endCDATA(self): "Reports the end of a CDATA marked section." def get_tree(self): self._p_stack[0][N_ISSUE] = self._n_elmt return self._p_stack[0] xmldiff-0.6.10/README.xmlrev0000644000201200020120000000076111434467462014354 0ustar alainalainXMLrev - a XML revision tool Prerequisites: * bash * xmldiff * xsltproc * sp * libxml2-utils * docbook-xsl xmlrev is a tool based on xmldiff reads two version of a document and outputs a new document that includes the revision information (parts added, removed and changed). This document can then be rendered with a specific stylesheet that renders the changes. Try: xmlrev doc.xml new_doc.xml > rev_doc.xml Then render rev_doc.xml with the docbook_rev.xsl stylesheet. LICENSE is GPL xmldiff-0.6.10/doc/0000755000201200020120000000000011441141233012700 5ustar alainalainxmldiff-0.6.10/doc/makefile0000644000201200020120000000045411434467463014425 0ustar alainalainMKHTML=mkdoc MKHTML_OPT=--doctype article --param toc.section.depth=1 --target html --stylesheet single-file SRC=. all: HELP.html API.html HELP.html: ${SRC}/HELP.txt ${MKHTML} ${MKHTML_OPT} ${SRC}/HELP.txt API.html: ${SRC}/API.txt ${MKHTML} ${MKHTML_OPT} ${SRC}/API.txt clean: rm -f *.html xmldiff-0.6.10/doc/API.txt0000644000201200020120000000674711434467463014112 0ustar alainalainXmlDiff API =========== :Author: Sylvain Thénault :Organization: Logilab :Version: $Revision: 1.2 $ :Date: $Date: 2003-10-02 10:38:21 $ .. contents:: To use this package as a librarie, you need the provided python's modules described below. mydifflib.py ------------ provides functions for Longest Common Subsequence calculation. lcs2(X, Y, equal): apply the greedy lcs/ses algorithm between X and Y sequence (should be any Python's sequence) equal is a function to compare X and Y which must return 0 (or a Python false value) if X and Y are different, 1 (or Python true value) if they are identical return a list of matched pairs in tuples lcsl(X, Y, equal): same as above but return the length of the lcs quick_ratio(a,b): optimized version of the standard difflib.py quick_ratio (without junk and class) return an upper bound on ratio() relatively quickly. input.py -------- provides functions for converting DOM tree or xml file in order to process it with xmldiff functions. tree_from_stream(stream, norm_sp=1, ext_ges=0, ext_pes=0, include_comment=1, encoding='UTF-8'): create and return internal tree from xml stream (open file or IOString) if norm_sp = 1, normalize space and new line if ext_ges = 1, include all external general (text) entities. if ext_pes = 1, include all external parameter entities, including the external DTD subset. if include_comment = 1, include comment nodes encoding specify the encoding to use tree_from_dom(root): create and return internal tree from DOM subtree fmes.py ------- Fast match/ Edit script algorithm (not sure to obtain the minimum edit cost, but accept big documents). Warning, the process(oldtree, newtree) function has a side effect: after call it, oldtree == newtree. class FmesCorrector(self, formatter, f=0.6, t=0.5): class which contains the fmes algorithm formatter is a class instance which handle the edit script formatting (see format.py) f and t are algorithm parameter, 0 < f < 1 and 0.5 < t < 1 in xmldiff, f = 0.59 and t = 0.5 FmesCorrector.process_trees(self, tree1, tree2): launch diff between internal tree tree1 (old xmltree) and tree2 (new xml tree) return an actions list ezs.py ** DEPRICATED ** ----------------------- Extended Zhang and Shasha algorithm (provide the minimum edit cost, but too complex to be used with big documents). class EzsCorrector(self): class which contains the ezs algorithm EzsCorrector.process_trees(self, tree1, tree2): launch diff between internal tree tree1 (old xmltree) and tree2 (new xml tree) return an actions list format.py --------- provides classes for converting xmldiff algorithms output to DOM tree or printing it in native format or xml xupdate format. The formatter interface is the following : class AbstractFormatter: abstract class designed to be overrinden by concrete formatters AbstractFormatter.init(self): method called before the begining of the tree 2 tree correction AbstractFormatter.add_action(self, action): method called when an action is added to the edit script AbstractFormatter.format_action(self, action): method called by end() to format each action in the edit script at least this method should be overriden AbstractFormatter.end(self): method called at the end of the tree 2 tree correction the concrete classes are InternalPrinter, XUpdatePrinter and DOMXUpdateFormatter See xmldiff.py for an use example.xmldiff-0.6.10/doc/HELP.txt0000644000201200020120000002057711435670671014224 0ustar alainalainXmlDiff TUTORIAL ================ :Author: Sylvain Thénault :Organization: Logilab :Version: $Revision: 1.4 $ :Date: $Date: 2003-10-08 09:34:12 $ .. contents:: Synopsis -------- :: xmldiff [Options] from_file to_file xmldiff [Options] [-r] from_directory to_directory Options: -h, --help display this help message and exit. -V, --version display version number and exit -H, --html input files are HTML instead of XML -r, --recursive when comparing directories, recursively compare any subdirectories found. -x, --xupdate display output following the Xupdate xml specification (see http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de). -e encoding, --encoding=encoding specify the encoding to use for output. Default is UTF-8 -n, --not-normalize-spaces do not normalize spaces and new lines in text and comment nodes. -c, --exclude-comments do not process comment nodes -g, --ext-ges include all external general (text) entities. -p, --ext-pes include all external parameter entities, including the external DTD subset. --profile=file display an execution profile (run slower with this option), profile saved to file (binarie form). Detailed example ---------------- if you process two files file1 and file2 which respectively contain: :: almaster@logilab.org and :: syt@logilab.org hoye! executing *xmldiff file1 file2* will give the following result: :: rename_node, /memory[1]/mailbox[1], box] [insert-after, /memory[1]/junkbuster-method[1], ] [insert-after, /memory[1]/spoken-languages[1], hoye! ] [update, /memory[1]/email_addr[1]/text()[1], syt@logilab.org] [rename_node, /memory[1]/junkbuster-method[1]@value, val] [append-first, /memory[1]/junkbuster-method[1], ] [move-first, /memory[1]/spoken-languages[2]/language[2], /memory[1]/spoken-languages[1]] [update, /memory[1]/server-socket[2]@port, 7797] [remove, /memory[1]/spoken-languages[2]] This give you a list of primitives to apply on file1 to obtain file2 (you should obtain file2 after the execution of all this script!). See [4] and [5] for more information. The script above tell you the 9 actions to apply on file1: * insert after the node /memory/spoken-languages[0] the below xml subtree:: hoye! * rename node /memory/mailbox[0] to "box" * append a node to the node /memory[0]/junkbuster-method[0] * append an attribute named "new" with value "new attribute" to the node /memory/spoken-languages[0] * update attribute /memory/server-socket[1]@port value to "7797" * update text /memory/email_addr/text()[0] to "syt@logilab.org" * rename attribute /memory/junkbuster-method[0]@value to "val" * move the attributes "code" and "name" from /memory[0]/spoken-languages[0]/language[1] to /memory[0]/spoken-languages[0]/language[0] and rename them to LogilabXmldiffTmpAttr:code and LogilabXmldiffTmpAttr:name * move the attributes "code" and "name" from /memory[0]/spoken-languages[0]/language[0] to /memory[0]/spoken-languages[0]/language[1] and rename them to LogilabXmldiffTmpAttr:code and LogilabXmldiffTmpAttr:name * remove node /memory/spoken-languages/language[2] * rename attributes LogilabXmldiffTmpAttr:code and LogilabXmldiffTmpAttr:name of /memory/spoken-languages/language[0] to name and code * rename attributes LogilabXmldiffTmpAttr:code and LogilabXmldiffTmpAttr:name of /memory/spoken-languages/language[1] to name and code Note all xpath are relative to the file1 with previous steps applied. if you would have typed "xmldiff -x file1 file2", you would have obtained the same thing described as an Xupdate output (see [3]). :: box new attribute hoye! syt@logilab.org val fr english 7797 Warnings -------- * This version of xmldiff doesn't process the DTD, CDATA and PROCESSING INSTRUCTIONS nodes, so if there is a difference between two document in one of those nodes, xmldiff won't see it. * Furthermore, xml namespaces are disabled: and are seen as different nodes * Comparing document bigger than 200Ko can take a few minutes (during tests, it took at about 25 seconds to diff two versions of a 130Ko document on a Celeron 533 box with 256Mo RAM) * The execution time is scaled to the number of differences between the documents to compare * Finally, a few assumptions have been made to obtain the faster algorithm: - there is an ordering <_l on the labels in the shema such that a node with a label l1 can appear as the descendent of a node with a label l2 only if l1 <_l l2 - for any leaf x from T1, there is at most one leaf y from T2 which can be mapped with x (internally, 2 node may be mapped together if their lcs (longest common subsequence) ratio is greater than 0.6) References ---------- 1. "Tree-to-tree correction for document trees" by D.T. Barnard, G. Clarke, N. Duncan Queen's university, Kingston, Ontario K7L 3N6 (Canada), 1995 The "ezs" algorithm 2. "Change detection in hierarchically structured information" by S. Chawathe, A. Rajaraman, H. Garcia-Molina, J. Widom Stanford University, 1996 The Fast Match / Edit Script algorithm (fmes), used by default 3. http://www.xmldb.org/xupdate/xupdate-wd.html#N19b1de XUpdate update language 4. http://www.w3.org/TR/2000/REC-xml-20001006 XML 1.0 W3C recommendation 5. http://www.w3.org/TR/xpath XML path language 1.0 W3C recommendation Feedback -------- xmldiff discussion should take place on the xml-logilab mailing list. Please check http://lists.logilab.org/mailman/listinfo/xml-projects for information on subscribing and the mailing list archives. xmldiff-0.6.10/TODO0000644000201200020120000000056111434467462012646 0ustar alainalain**** TODO List for xmldiff **** _ report namespaces declaration ! _ support Processing Instruction nodes, CDATA _ support for XML namespaces _ option for case insensitive _ data/document modes ? _ translate HELP.txt and API.txt to docbook _ update ezs to make it work with the new internal representation _ optimizations: use tuple instead of list when it's possiblexmldiff-0.6.10/setup.cfg0000644000201200020120000000013211434467463013772 0ustar alainalain[bdist_rpm] packager = Sylvain Thenault provides = xmldiff