bzr-fastimport-0.13.0+bzr361/.testr.conf0000644000000000000000000000026511500743162016017 0ustar 00000000000000[DEFAULT] test_command=BZR_PLUGINS_AT=fastimport@. bzr selftest "^bzrlib.plugins.fastimport" --subunit $IDOPTION $LISTOPT test_id_option=--load-list $IDFILE test_list_option=--list bzr-fastimport-0.13.0+bzr361/COPYING.txt0000644000000000000000000004310310754757632015621 0ustar 00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. bzr-fastimport-0.13.0+bzr361/NEWS0000644000000000000000000003414312323416055014434 0ustar 00000000000000============================ bzr-fastimport Release Notes ============================ .. contents:: 0.14 UNRELEASED Compatibility ------------- * Replace Repo.get_ancestry(as per #1040560), which is removed in bzr 2.6. (Zygmunt Krynicki) Bug fixes --------- * Print sane error when a fastimport file is incomplete. (Jelmer Vernooij, #937972) Features -------- * New option --no-tags for 'bzr fast-export'. (Alex Usov) 0.13 2012-02-29 Changes ------- * bzr-fastimport no longer ships exporters for various other version control systems. The versions of these exporters shipped with bzr-fastimport have been outdated for some time. Better and more recent exporters are widely available. Compatibility ------------- * Avoid using Tree.inventory directly, which is deprecated in bzr 2.6. (Jelmer Vernooij) * Reimport some modules removed from python-fastimport 0.9.2. (Jelmer Vernooij, #693507) Improvements ------------ * Document the SOURCE argument for ``bzr fast-export``. (Jelmer Vernooij, #672926) 0.12 2012-02-09 Bug fixes --------- * Cope with non-utf8 characters in paths when importing. (Jelmer Vernooij, #838980) Features -------- * New option --rewrite-tag-names for 'bzr fast-export'. (Alex Usov, #872601) * New option --dont-squash-empty-commits for 'bzr fast-import-filter'. (Alex Usov, #877303) * Add --baseline option to ``bzr export``. (Andy Grimm, #893382) 0.11 2011-08-22 Bug fixes --------- * Fix internal error in 'bzr fast-import-info'. (Jelmer Vernooij, #730833) * Setup.py no longer relies on bzrlib. (Jelmer Vernooij, #735201) * Avoid Inventory.copy, which has disappeared in newer versions of Bazaar. (Jelmer Vernooij, #485788) * Allow "bzr fast-import-filter" to be used without first argument. (Jelmer Vernooij, #792935) * Remove --branches-path and --tags-path options from ``bzr fast-export-from-svn``. These options are not supported by the underlying svn-fast-export.py yet. (Jelmer Vernooij, #513747) * In "plain" mode, skip tags that contain characters not valid in Git. (Jelmer Vernooij, #386986) * Support exporting files that are changed into directories. (Henrik Nyberg, #631979) 0.10 08-Mar-2011 Changes ------- * bzr-fastimport's file parsing and generation functionality has been exported into separate upstream project called python-fastimport, that it now depends on. python-fastimport can be retrieved from http://launchpad.net/python-fastimport. Bug fixes --------- * Cope with non-ascii characters in tag names. (Jelmer Vernooij, #598060) * Cope with non-ascii characters in symbolic links. (Daniel Clemente, Jelmer Vernooij, #238365) * In plain mode, don't export multiple authors. (David Kilzer, #496677) * Fix indentation when starting p4 fails. (Jelmer Vernooij) * SOURCE argument to bzr fast-import-filter is now optional, consistent with examples. (Jelmer Vernooij, #477861) * Support new marks file format introduced in (apparently) git 1.6. (Ian Clatworthy, Jelmer Vernooij, Gabriel Filion, #347729) * Allow entries without email address in user map. (Jelmer Vernooij, #576598) * Strip leading slashes when exporting Subversion repositories. (Jelmer Vernooij, #477869) * Cope with non-utf8 characters in commit messages. (Jelmer Vernooij, #444896) * Include download_url in setup.py for use on pypi. (Jelmer Vernooij, #456957) * Fix incremental imports from incremental import streams. (termie, Jelmer Vernooij, #328494) 0.9 28-Feb-2010 =============== New Features ------------ * The fast-import command now takes an optional but recommended DESTINATION argument. A repository will be created at this location and branches will be created within there. If the user is running bzr 1.17 up to 2.0, format "2a" is used for these, otherwise the default format is used. A format can be explicitly given using the new --format option. (Ian Clatworthy) * Wrapper commands simplifying the generation of fast-import files from other tools are now provided. The general usage is: bzr fast-export-from-xxx SOURCE project.fi Before starting an export, these commands make an effort to check that dependent tools are installed. So far, wrapper commands are available for cvs, darcs, git, hg (Mercurial), mnt (Monotone), p4 (Perforce) and svn (Subversion). (Ian Clatworthy, Matt McClure) * darcs-fast-export is now bundled. In fact, the project has merged into this one for the purposes of ongoing bug fixing and development. (Miklos Vajna) * fast-export now supports a --no-plain parameter which causes richer metadata to be included in the output using the recently agreed 'feature' extension capability. The features used are called multiple-authors, commit-properties and empty-directories. (Ian Clatworthy) * fast-import and fast-import-filter now support user mapping via the new --user-map option. The argument is a file specifying how user-ids should be mapped to preferred user-ids. (Ian Clatworthy) * svn-fast-export now supports an address option (to control the default email domain) and a first-rev option (to select revisions since a given one). (Ted Gould) Improvements ------------ * Large repositories now compress better thanks to a change in how file-ids are assigned. (Ian Clatworthy, John Arbash Meinel) * Memory usage is improved by flushing blobs to a disk cache when appropriate. (John Arbash Meinel) * If a fast-import source ends in ".gz", it is assumed to be in gzip format and the stream is implicitly uncompressed. This means fast-import dump files generated by fast-export-from-xxx can be stored gzipped to save space. (Ian Clatworthy) * The working tree is now implicitly updated for trunk. Other branches still need to be explicitly created using 'bzr update'. (Ian Clatworthy) * Directories that become empty following a delete or rename of one of their children are now implicitly pruned. If required, this will be made optional at a later date. (Tom Widmer, Ian Clatworthy) * Blob tracking is now more intelligently done by an implicit first pass to collect blob usage statistics. This eliminates the need for an explicit 2-step process in all cases except where stdin is used as the input source. (Ian Clatworthy) * Updated the bundled version of hg-fast-export to be the latest code (25-May-2009) from http://repo.or.cz/w/fast-export.git. (Ian Clatworthy) Bug Fixes --------- * Fixed the format used to create branches in a shared repository. It now selects the best branch format based on the repository format, rather than assume the default branch format is the right one. (Ian Clatworthy) * Fixed inventory delta generation when deleting directories. (Previously the child paths included were relative to the directory when they ought to be relative to the root.) (Ian Clatworthy) * Gracefully handle email addresses with unicode characters. (Ian Clatworthy) * Gracefully handle an empty input stream. (Gonéri Le Bouder) * Gracefully handle git submodules by ignoring them. (Ian Clatworthy) * Get git-bzr working again. (Gonéri Le Bouder) Documentation ------------- * Improved documentation has been published in the Bazaar Data Migration Guide: http://doc.bazaar-vcs.org/migration/en/data-migration/. 0.8 22-Jul-2009 =============== Compatibility Breaks -------------------- * ``exporters/bzr-fast-export`` has been replaced with a ``fast-export`` command. Some minor issues have been fixed at the same time: the first commit now goes into refs/heads/master (not refs/head/tmp); there's no checkpoint at the top of the stream; parent commits are now always given lower mark numbers than the commits they are merged into. (Ian Clatworthy) * The ``fast-import`` command now uses a different mapping of git reference names to bzr branch names. In summary: * ``refs/heads/foo`` is mapped to ``foo`` * ``refs/remotes/origin/foo`` is mapped to ``foo.remote`` * ``refs/tags/foo`` is mapped to ``foo.tag`` * ``*/master`` is mapped to ``trunk``, ``trunk.remote``, etc. * ``*/trunk`` is mapped to ``git-trunk``, ``git-trunk.remote``, etc. This new mapping has been introduced so that more natural branch names are used and to enable round-tripping back to git. (Ian Clatworthy) * The old ``fast-import-filter`` command is now called ``fast-import-query``. ``fast-import-filter`` now really filters the input to produce a fast-import stream based on filtering criteria. See below. (Ian Clatworthy) * The ``--inv-fulltext`` option is no longer supported. It was only used in experimental mode for old versions of bzrlib so it added more complexity than value. (Ian Clatworthy) New Features ------------ * Added ``fast-import-filter`` command for splitting out a subdirectory or bunch of files into their own project. It can also be used to create a new repository without any history for nominated files and directories. This is useful for removing information which is a security risk, huge binary files like ISO images, etc. (Ian Clatworthy) * Copying of files and symbolic links is now supported. (Ian Clatworthy) * Initial cut at reset support. (Brian de Alwis, Ian Clatworthy) Improvements ------------ * If ``refs/heads/xxx`` and ``refs/remotes/origin/xxx`` are both defined, the latter is now mapped to a bzr branch called ``xxx.remote`` rather than ``remotes--origins--xxx``. (Ian Clatworthy) * ``bzr fast-import-info`` now handles an unlimited # of parents for a revision. The spec suggests the maximum ought to be 16 but the linux kernel has revisions with more than that. (Ian Clatworthy) * ``bzr fast-import-info`` now reports on things that may need caching, i.e. merges, rename old paths and copy source paths. (Ian Clatworthy) * Tag commands with a missing from clause now produce a warning but are otherwise ignored. (Scott James Remnant, Ian Clatworthy) * The fastimport-id-map file can now have more revisions than the repository. (Scott James Remnant) * Updated the bundled version of hg-fast-export to be the latest code from http://repo.or.cz/w/fast-export.git. It should now support recent Mercurial releases. (Ian Clatworthy, #318903) Bug Fixes --------- * Fixed a *bad* bug where filecopy commands were being parsed as filerename commands. Repositories generated by previous version of bzr-fast-import where the input stream contained filecopy commands might be missing data (the copy source will no longer be there if it was unchanged since the copy happened) and ought to be regenerated. (Ian Clatworthy) * Fixed how the per-file graph is generated. The per-file graph may still be less than perfect in the case where a file is only changed in a merge and not the parent, but in the vast majority of cases now, ``bzr check`` should no longer report inconsistent parents. (Ian Clatworthy) * Fix ``os`` import as needed on Windows. (Ian Clatworthy, esskov, #264988) * Handle a directory turning into a file and then the children of that directory being deleted. (Ian Clatworthy, #309486) * Handle an empty email section. (Ian Clatworthy) * Handle multiple merges within the one merge clause. That's illegal according to the spec but git-fast-export does it. (Ian Clatworthy, #259711) * Handle names and paths that aren't utf8 encoded. The spec recommends utf8 encoding of these but git-fast-export doesn't always do that. (Ian Clatworthy, #289088) * Ignore lightweight tags with no from clause rather than abort. (It seems git-fast-export outputs these commands frequently now while it didn't appear to in early versions.) (Ian Clatworthy, edice, #259711) * Import into rich-root (and subtree) repositories without aborting. (Ian Clatworthy, #300921) * Recursively delete children when a directory is deleted. (Scott James Remnant) * The ``deleteall`` command now only tries to delete files in the nominated branch, not all known files. As a consequence, it should now be possible (if it wasn't before) to import multiple Darcs branches (via darcs-fast-export) at once. (Ian Clatworthy) Testing ------- * A large number of tests have been added. (Ian Clatworthy) Internals --------- * Refactored ``processors/generic_processor.py`` into a bunch of modules. (Ian Clatworthy) 0.7 09-Feb-2009 =============== Compatibility Breaks -------------------- * bzr-fast-export.py renamed to bzr-fast-export. (Jelmer Vernooij) Improvements ------------ * Add support for the deleteall command. (Miklos Vajna, #284941) Bug Fixes --------- * bzr-fast-export now exports rm+mv correctly. (Jonas) * Fix recursive rename handling in bzr-fast-export. (Pieter de Bie, #287785) * hg-fast-export should use binary mode on Windows. (Alexey Stukalov) * setup.py no longer assumes python2.4. (schickb@gmail.com) * setup.py support fixed. (Jelmer Vernooij) * Update the last-modified revision for a renamed file. (John Arbash Meinel) 0.6 23-Jul-2008 =============== Improvements ------------ * Added NEWS containing Release Notes. (Ian Clatworthy) * ``bzr help fastimport`` now provides help that is useful. (Ian Clatworthy) * Numerous fixes to ``bzr-fast-export.py`` to improve round-tripping with Git. Added ``--import-marks`` and ``--export-marks`` options to ``fast-import`` as well. (Pieter de Bie) * ``svn-fast-export.py`` now supports a regular-expression to specify the branches to export. (Mirko Friedenhagen) Bug Fixes --------- * Support the new Repository API added in bzr.dev r3510. The old API will be used for earlier versions of bzr including bzr 1.6beta2 and earlier. (Ian Clatworthy) Compatibility Breaks -------------------- * The ``--inv-fulltext`` option is not yet supported when the new Repository API is used to import revisions. The option can be provided but it will be ignored. (Ian Clatworthy) API Breaks * The ``RevisionLoader`` class has been renamed to ``RevisionLoader1``. The ``ExperimentalRevisionLoader`` class has been renamed to ``ImportRevisionLoader1``. New classes called ``RevisionLoader2`` and ``ImportRevisionLoader2`` are provided that use the new Repository API. (Ian Clatworthy) Internals --------- * Improved packaging by adding a setup.py file. (Ian Clatworthy) 0.5 02-Jun-2008 =============== * Version suitable for Bazaar 1.5. (Ian Clatworthy) bzr-fastimport-0.13.0+bzr361/README.txt0000644000000000000000000000277311723410267015441 0ustar 00000000000000bzr-fastimport: Backend for fast Bazaar data importers ====================================================== Dependencies ------------ Required and recommended packages are: * Python 2.4 or later * Python-Fastimport 0.9.0 or later. * Bazaar 1.18 or later. Installation ------------ The easiest way to install this plugin is to either copy or symlink the directory into your ~/.bazaar/plugins directory. Be sure to rename the directory to fastimport (instead of bzr-fastimport). See http://bazaar-vcs.org/UsingPlugins for other options such as using the BZR_PLUGIN_PATH environment variable. Testing ------- To test the plugin after installation: bzr selftest fastimport Documentation ------------- To view the documentation after installation: bzr help fastimport Exporters/Importers ------------------- There are also fast importers / fast exporters available for other version control tools. * Git: fastimport support is built-in ("git fast-import" / "git fast-export") * Mercurial: http://mercurial.selenic.com/wiki/FastImportExtension * Subversion: http://repo.or.cz/w/fast-export.git * CVS: http://cvs2svn.tigris.org/cvs2bzr.html * Perforce: https://github.com/git/git/blob/master/contrib/fast-import/git-p4 Licensing --------- For copyright and licensing details of the exporters, see the relevant files in the exporters/ directory. Otherwise this plugin is (C) Copyright Canonical Limited 2008 under the GPL Version 2 or later. Please see the file COPYING.txt for the licence details. bzr-fastimport-0.13.0+bzr361/__init__.py0000644000000000000000000000612411723177743016057 0ustar 00000000000000# Copyright (C) 2008-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . r"""FastImport Plugin ================= The fastimport plugin provides stream-based importing and exporting of data into and out of Bazaar. As well as enabling interchange between multiple VCS tools, fastimport/export can be useful for complex branch operations, e.g. partitioning off part of a code base in order to Open Source it. The normal import recipe is:: front-end > project.fi bzr fast-import project.fi project.bzr In either case, if you wish to save disk space, project.fi can be compressed to gzip format after it is generated like this:: (generate project.fi) gzip project.fi bzr fast-import project.fi.gz project.bzr The list of known front-ends and their status is documented on http://bazaar-vcs.org/BzrFastImport/FrontEnds. Once a fast-import dump file is created, it can be imported into a Bazaar repository using the fast-import command. If required, you can manipulate the stream first using the fast-import-filter command. This is useful for creating a repository with just part of a project or for removing large old binaries (say) from history that are no longer valuable to retain. For further details on importing, manipulating and reporting on fast-import streams, see the online help for the commands:: bzr help fast-import bzr help fast-import-filter bzr help fast-import-info bzr help fast-import-query Finally, you may wish to generate a fast-import dump file from a Bazaar repository. The fast-export command is provided for that purpose. To report bugs or publish enhancements, visit the bzr-fastimport project page on Launchpad, https://launchpad.net/bzr-fastimport. """ from __future__ import absolute_import from bzrlib.plugins.fastimport.info import ( bzr_plugin_version as version_info, ) from bzrlib.commands import plugin_cmds def load_fastimport(): """Load the fastimport module or raise an appropriate exception.""" try: import fastimport except ImportError, e: from bzrlib.errors import DependencyNotPresent raise DependencyNotPresent("fastimport", "bzr-fastimport requires the fastimport python module") def test_suite(): from bzrlib.plugins.fastimport import tests return tests.test_suite() for name in [ "fast_import", "fast_import_filter", "fast_import_info", "fast_import_query", "fast_export", ]: plugin_cmds.register_lazy("cmd_%s" % name, [], "bzrlib.plugins.fastimport.cmds") bzr-fastimport-0.13.0+bzr361/branch_mapper.py0000644000000000000000000000421011643171110017072 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """An object that maps git ref names to bzr branch names. Note that it is not used to map git ref names to bzr tag names.""" import re class BranchMapper(object): _GIT_TRUNK_RE = re.compile('(?:git-)*trunk') def git_to_bzr(self, ref_name): """Map a git reference name to a Bazaar branch name. """ parts = ref_name.split('/') if parts[0] == 'refs': parts.pop(0) category = parts.pop(0) if category == 'heads': git_name = '/'.join(parts) bazaar_name = self._git_to_bzr_name(git_name) else: if category == 'remotes' and parts[0] == 'origin': parts.pop(0) git_name = '/'.join(parts) if category.endswith('s'): category = category[:-1] name_no_ext = self._git_to_bzr_name(git_name) bazaar_name = "%s.%s" % (name_no_ext, category) return bazaar_name def _git_to_bzr_name(self, git_name): # Make a simple name more bzr-like, by mapping git 'master' to bzr 'trunk'. # To avoid collision, map git 'trunk' to bzr 'git-trunk'. Likewise # 'git-trunk' to 'git-git-trunk' and so on, such that the mapping is # one-to-one in both directions. if git_name == 'master': bazaar_name = 'trunk' elif self._GIT_TRUNK_RE.match(git_name): bazaar_name = 'git-%s' % (git_name,) else: bazaar_name = git_name return bazaar_name bzr-fastimport-0.13.0+bzr361/branch_updater.py0000644000000000000000000001616312335104053017265 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """An object that updates a bunch of branches based on data imported.""" from operator import itemgetter from bzrlib import bzrdir, errors, osutils, transport from bzrlib.trace import show_error, note from bzrlib.plugins.fastimport.helpers import ( best_format_for_objects_in_a_repository, single_plural, ) class BranchUpdater(object): def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags): """Create an object responsible for updating branches. :param heads_by_ref: a dictionary where names are git-style references like refs/heads/master; values are one item lists of commits marks. """ self.repo = repo self.branch = branch self.cache_mgr = cache_mgr self.heads_by_ref = heads_by_ref self.last_ref = last_ref self.tags = tags self._branch_format = \ best_format_for_objects_in_a_repository(repo) def update(self): """Update the Bazaar branches and tips matching the heads. If the repository is shared, this routine creates branches as required. If it isn't, warnings are produced about the lost of information. :return: updated, lost_heads where updated = the list of branches updated ('trunk' is first) lost_heads = a list of (bazaar-name,revision) for branches that would have been created had the repository been shared """ updated = [] branch_tips, lost_heads = self._get_matching_branches() for br, tip in branch_tips: if self._update_branch(br, tip): updated.append(br) return updated, lost_heads def _get_matching_branches(self): """Get the Bazaar branches. :return: branch_tips, lost_heads where branch_tips = a list of (branch,tip) tuples for branches. The first tip is the 'trunk'. lost_heads = a list of (bazaar-name,revision) for branches that would have been created had the repository been shared and everything succeeded """ branch_tips = [] lost_heads = [] ref_names = self.heads_by_ref.keys() if self.branch is not None: trunk = self.select_trunk(ref_names) default_tip = self.heads_by_ref[trunk][0] branch_tips.append((self.branch, default_tip)) ref_names.remove(trunk) # Convert the reference names into Bazaar speak. If we haven't # already put the 'trunk' first, do it now. git_to_bzr_map = {} for ref_name in ref_names: git_to_bzr_map[ref_name] = self.cache_mgr.branch_mapper.git_to_bzr(ref_name) if ref_names and self.branch is None: trunk = self.select_trunk(ref_names) git_bzr_items = [(trunk, git_to_bzr_map[trunk])] del git_to_bzr_map[trunk] else: git_bzr_items = [] git_bzr_items.extend(sorted(git_to_bzr_map.items(), key=itemgetter(1))) # Policy for locating branches def dir_under_current(name, ref_name): # Using the Bazaar name, get a directory under the current one repo_base = self.repo.bzrdir.transport.base return osutils.pathjoin(repo_base, "..", name) def dir_sister_branch(name, ref_name): # Using the Bazaar name, get a sister directory to the branch return osutils.pathjoin(self.branch.base, "..", name) if self.branch is not None: dir_policy = dir_sister_branch else: dir_policy = dir_under_current # Create/track missing branches shared_repo = self.repo.is_shared() for ref_name, name in git_bzr_items: tip = self.heads_by_ref[ref_name][0] if shared_repo: location = dir_policy(name, ref_name) try: br = self.make_branch(location) branch_tips.append((br,tip)) continue except errors.BzrError, ex: show_error("ERROR: failed to create branch %s: %s", location, ex) lost_head = self.cache_mgr.lookup_committish(tip) lost_info = (name, lost_head) lost_heads.append(lost_info) return branch_tips, lost_heads def select_trunk(self, ref_names): """Given a set of ref names, choose one as the trunk.""" for candidate in ['refs/heads/master']: if candidate in ref_names: return candidate # Use the last reference in the import stream return self.last_ref def make_branch(self, location): """Make a branch in the repository if not already there.""" to_transport = transport.get_transport(location) to_transport.create_prefix() try: return bzrdir.BzrDir.open(location).open_branch() except errors.NotBranchError, ex: return bzrdir.BzrDir.create_branch_convenience(location, format=self._branch_format, possible_transports=[to_transport]) def _update_branch(self, br, last_mark): """Update a branch with last revision and tag information. :return: whether the branch was changed or not """ last_rev_id = self.cache_mgr.lookup_committish(last_mark) self.repo.lock_read() try: graph = self.repo.get_graph() revno = graph.find_distance_to_null(last_rev_id, []) finally: self.repo.unlock() existing_revno, existing_last_rev_id = br.last_revision_info() changed = False if revno != existing_revno or last_rev_id != existing_last_rev_id: br.set_last_revision_info(revno, last_rev_id) changed = True # apply tags known in this branch my_tags = {} if self.tags: graph = self.repo.get_graph() ancestry = [r for (r, ps) in graph.iter_ancestry([last_rev_id]) if ps is not None] for tag,rev in self.tags.items(): if rev in ancestry: my_tags[tag] = rev if my_tags: br.tags._set_tag_dict(my_tags) changed = True if changed: tagno = len(my_tags) note("\t branch %s now has %d %s and %d %s", br.nick, revno, single_plural(revno, "revision", "revisions"), tagno, single_plural(tagno, "tag", "tags")) return changed bzr-fastimport-0.13.0+bzr361/bzr_commit_handler.py0000644000000000000000000011214111643171110020136 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """CommitHandlers that build and save revisions & their inventories.""" from bzrlib import ( debug, errors, generate_ids, inventory, osutils, revision, serializer, ) from bzrlib.trace import ( mutter, note, warning, ) from fastimport import ( helpers, processor, ) from bzrlib.plugins.fastimport.helpers import ( mode_to_kind, ) _serializer_handles_escaping = hasattr(serializer.Serializer, 'squashes_xml_invalid_characters') def copy_inventory(inv): entries = inv.iter_entries_by_dir() inv = inventory.Inventory(None, inv.revision_id) for path, inv_entry in entries: inv.add(inv_entry.copy()) return inv class GenericCommitHandler(processor.CommitHandler): """Base class for Bazaar CommitHandlers.""" def __init__(self, command, cache_mgr, rev_store, verbose=False, prune_empty_dirs=True): super(GenericCommitHandler, self).__init__(command) self.cache_mgr = cache_mgr self.rev_store = rev_store self.verbose = verbose self.branch_ref = command.ref self.prune_empty_dirs = prune_empty_dirs # This tracks path->file-id for things we're creating this commit. # If the same path is created multiple times, we need to warn the # user and add it just once. # If a path is added then renamed or copied, we need to handle that. self._new_file_ids = {} # This tracks path->file-id for things we're modifying this commit. # If a path is modified then renamed or copied, we need the make # sure we grab the new content. self._modified_file_ids = {} # This tracks the paths for things we're deleting this commit. # If the same path is added or the destination of a rename say, # then a fresh file-id is required. self._paths_deleted_this_commit = set() def mutter(self, msg, *args): """Output a mutter but add context.""" msg = "%s (%s)" % (msg, self.command.id) mutter(msg, *args) def debug(self, msg, *args): """Output a mutter if the appropriate -D option was given.""" if "fast-import" in debug.debug_flags: msg = "%s (%s)" % (msg, self.command.id) mutter(msg, *args) def note(self, msg, *args): """Output a note but add context.""" msg = "%s (%s)" % (msg, self.command.id) note(msg, *args) def warning(self, msg, *args): """Output a warning but add context.""" msg = "%s (%s)" % (msg, self.command.id) warning(msg, *args) def pre_process_files(self): """Prepare for committing.""" self.revision_id = self.gen_revision_id() # cache of texts for this commit, indexed by file-id self.data_for_commit = {} #if self.rev_store.expects_rich_root(): self.data_for_commit[inventory.ROOT_ID] = [] # Track the heads and get the real parent list parents = self.cache_mgr.reftracker.track_heads(self.command) # Convert the parent commit-ids to bzr revision-ids if parents: self.parents = [self.cache_mgr.lookup_committish(p) for p in parents] else: self.parents = [] self.debug("%s id: %s, parents: %s", self.command.id, self.revision_id, str(self.parents)) # Tell the RevisionStore we're starting a new commit self.revision = self.build_revision() self.parent_invs = [self.get_inventory(p) for p in self.parents] self.rev_store.start_new_revision(self.revision, self.parents, self.parent_invs) # cache of per-file parents for this commit, indexed by file-id self.per_file_parents_for_commit = {} if self.rev_store.expects_rich_root(): self.per_file_parents_for_commit[inventory.ROOT_ID] = () # Keep the basis inventory. This needs to be treated as read-only. if len(self.parents) == 0: self.basis_inventory = self._init_inventory() else: self.basis_inventory = self.get_inventory(self.parents[0]) if hasattr(self.basis_inventory, "root_id"): self.inventory_root_id = self.basis_inventory.root_id else: self.inventory_root_id = self.basis_inventory.root.file_id # directory-path -> inventory-entry for current inventory self.directory_entries = {} def _init_inventory(self): return self.rev_store.init_inventory(self.revision_id) def get_inventory(self, revision_id): """Get the inventory for a revision id.""" try: inv = self.cache_mgr.inventories[revision_id] except KeyError: if self.verbose: self.mutter("get_inventory cache miss for %s", revision_id) # Not cached so reconstruct from the RevisionStore inv = self.rev_store.get_inventory(revision_id) self.cache_mgr.inventories[revision_id] = inv return inv def _get_data(self, file_id): """Get the data bytes for a file-id.""" return self.data_for_commit[file_id] def _get_lines(self, file_id): """Get the lines for a file-id.""" return osutils.split_lines(self._get_data(file_id)) def _get_per_file_parents(self, file_id): """Get the lines for a file-id.""" return self.per_file_parents_for_commit[file_id] def _get_inventories(self, revision_ids): """Get the inventories for revision-ids. This is a callback used by the RepositoryStore to speed up inventory reconstruction. """ present = [] inventories = [] # If an inventory is in the cache, we assume it was # successfully loaded into the revision store for revision_id in revision_ids: try: inv = self.cache_mgr.inventories[revision_id] present.append(revision_id) except KeyError: if self.verbose: self.note("get_inventories cache miss for %s", revision_id) # Not cached so reconstruct from the revision store try: inv = self.get_inventory(revision_id) present.append(revision_id) except: inv = self._init_inventory() self.cache_mgr.inventories[revision_id] = inv inventories.append(inv) return present, inventories def bzr_file_id_and_new(self, path): """Get a Bazaar file identifier and new flag for a path. :return: file_id, is_new where is_new = True if the file_id is newly created """ if path not in self._paths_deleted_this_commit: # Try file-ids renamed in this commit id = self._modified_file_ids.get(path) if id is not None: return id, False # Try the basis inventory id = self.basis_inventory.path2id(path) if id is not None: return id, False # Try the other inventories if len(self.parents) > 1: for inv in self.parent_invs[1:]: id = self.basis_inventory.path2id(path) if id is not None: return id, False # Doesn't exist yet so create it dirname, basename = osutils.split(path) id = generate_ids.gen_file_id(basename) self.debug("Generated new file id %s for '%s' in revision-id '%s'", id, path, self.revision_id) self._new_file_ids[path] = id return id, True def bzr_file_id(self, path): """Get a Bazaar file identifier for a path.""" return self.bzr_file_id_and_new(path)[0] def _utf8_decode(self, field, value): try: return value.decode('utf-8') except UnicodeDecodeError: # The spec says fields are *typically* utf8 encoded # but that isn't enforced by git-fast-export (at least) self.warning("%s not in utf8 - replacing unknown " "characters" % (field,)) return value.decode('utf-8', 'replace') def _decode_path(self, path): try: return path.decode('utf-8') except UnicodeDecodeError: # The spec says fields are *typically* utf8 encoded # but that isn't enforced by git-fast-export (at least) self.warning("path %r not in utf8 - replacing unknown " "characters" % (path,)) return path.decode('utf-8', 'replace') def _format_name_email(self, section, name, email): """Format name & email as a string.""" name = self._utf8_decode("%s name" % section, name) email = self._utf8_decode("%s email" % section, email) if email: return "%s <%s>" % (name, email) else: return name def gen_revision_id(self): """Generate a revision id. Subclasses may override this to produce deterministic ids say. """ committer = self.command.committer # Perhaps 'who' being the person running the import is ok? If so, # it might be a bit quicker and give slightly better compression? who = self._format_name_email("committer", committer[0], committer[1]) timestamp = committer[2] return generate_ids.gen_revision_id(who, timestamp) def build_revision(self): rev_props = self._legal_revision_properties(self.command.properties) if 'branch-nick' not in rev_props: rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr( self.branch_ref) self._save_author_info(rev_props) committer = self.command.committer who = self._format_name_email("committer", committer[0], committer[1]) try: message = self.command.message.decode("utf-8") except UnicodeDecodeError: self.warning( "commit message not in utf8 - replacing unknown characters") message = self.command.message.decode('utf-8', 'replace') if not _serializer_handles_escaping: # We need to assume the bad ol' days message = helpers.escape_commit_message(message) return revision.Revision( timestamp=committer[2], timezone=committer[3], committer=who, message=message, revision_id=self.revision_id, properties=rev_props, parent_ids=self.parents) def _legal_revision_properties(self, props): """Clean-up any revision properties we can't handle.""" # For now, we just check for None because that's not allowed in 2.0rc1 result = {} if props is not None: for name, value in props.items(): if value is None: self.warning( "converting None to empty string for property %s" % (name,)) result[name] = '' else: result[name] = value return result def _save_author_info(self, rev_props): author = self.command.author if author is None: return if self.command.more_authors: authors = [author] + self.command.more_authors author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors] elif author != self.command.committer: author_ids = [self._format_name_email("author", author[0], author[1])] else: return # If we reach here, there are authors worth storing rev_props['authors'] = "\n".join(author_ids) def _modify_item(self, path, kind, is_executable, data, inv): """Add to or change an item in the inventory.""" # If we've already added this, warn the user that we're ignoring it. # In the future, it might be nice to double check that the new data # is the same as the old but, frankly, exporters should be fixed # not to produce bad data streams in the first place ... existing = self._new_file_ids.get(path) if existing: # We don't warn about directories because it's fine for them # to be created already by a previous rename if kind != 'directory': self.warning("%s already added in this commit - ignoring" % (path,)) return # Create the new InventoryEntry basename, parent_id = self._ensure_directory(path, inv) file_id = self.bzr_file_id(path) ie = inventory.make_entry(kind, basename, parent_id, file_id) ie.revision = self.revision_id if kind == 'file': ie.executable = is_executable # lines = osutils.split_lines(data) ie.text_sha1 = osutils.sha_string(data) ie.text_size = len(data) self.data_for_commit[file_id] = data elif kind == 'directory': self.directory_entries[path] = ie # There are no lines stored for a directory so # make sure the cache used by get_lines knows that self.data_for_commit[file_id] = '' elif kind == 'symlink': ie.symlink_target = self._decode_path(data) # There are no lines stored for a symlink so # make sure the cache used by get_lines knows that self.data_for_commit[file_id] = '' else: self.warning("Cannot import items of kind '%s' yet - ignoring '%s'" % (kind, path)) return # Record it if inv.has_id(file_id): old_ie = inv[file_id] if old_ie.kind == 'directory': self.record_delete(path, old_ie) self.record_changed(path, ie, parent_id) else: try: self.record_new(path, ie) except: print "failed to add path '%s' with entry '%s' in command %s" \ % (path, ie, self.command.id) print "parent's children are:\n%r\n" % (ie.parent_id.children,) raise def _ensure_directory(self, path, inv): """Ensure that the containing directory exists for 'path'""" dirname, basename = osutils.split(path) if dirname == '': # the root node doesn't get updated return basename, self.inventory_root_id try: ie = self._get_directory_entry(inv, dirname) except KeyError: # We will create this entry, since it doesn't exist pass else: return basename, ie.file_id # No directory existed, we will just create one, first, make sure # the parent exists dir_basename, parent_id = self._ensure_directory(dirname, inv) dir_file_id = self.bzr_file_id(dirname) ie = inventory.entry_factory['directory'](dir_file_id, dir_basename, parent_id) ie.revision = self.revision_id self.directory_entries[dirname] = ie # There are no lines stored for a directory so # make sure the cache used by get_lines knows that self.data_for_commit[dir_file_id] = '' # It's possible that a file or symlink with that file-id # already exists. If it does, we need to delete it. if inv.has_id(dir_file_id): self.record_delete(dirname, ie) self.record_new(dirname, ie) return basename, ie.file_id def _get_directory_entry(self, inv, dirname): """Get the inventory entry for a directory. Raises KeyError if dirname is not a directory in inv. """ result = self.directory_entries.get(dirname) if result is None: if dirname in self._paths_deleted_this_commit: raise KeyError try: file_id = inv.path2id(dirname) except errors.NoSuchId: # In a CHKInventory, this is raised if there's no root yet raise KeyError if file_id is None: raise KeyError result = inv[file_id] # dirname must be a directory for us to return it if result.kind == 'directory': self.directory_entries[dirname] = result else: raise KeyError return result def _delete_item(self, path, inv): newly_added = self._new_file_ids.get(path) if newly_added: # We've only just added this path earlier in this commit. file_id = newly_added # note: delta entries look like (old, new, file-id, ie) ie = self._delta_entries_by_fileid[file_id][3] else: file_id = inv.path2id(path) if file_id is None: self.mutter("ignoring delete of %s as not in inventory", path) return try: ie = inv[file_id] except errors.NoSuchId: self.mutter("ignoring delete of %s as not in inventory", path) return self.record_delete(path, ie) def _copy_item(self, src_path, dest_path, inv): newly_changed = self._new_file_ids.get(src_path) or \ self._modified_file_ids.get(src_path) if newly_changed: # We've only just added/changed this path earlier in this commit. file_id = newly_changed # note: delta entries look like (old, new, file-id, ie) ie = self._delta_entries_by_fileid[file_id][3] else: file_id = inv.path2id(src_path) if file_id is None: self.warning("ignoring copy of %s to %s - source does not exist", src_path, dest_path) return ie = inv[file_id] kind = ie.kind if kind == 'file': if newly_changed: content = self.data_for_commit[file_id] else: content = self.rev_store.get_file_text(self.parents[0], file_id) self._modify_item(dest_path, kind, ie.executable, content, inv) elif kind == 'symlink': self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv) else: self.warning("ignoring copy of %s %s - feature not yet supported", kind, dest_path) def _rename_item(self, old_path, new_path, inv): existing = self._new_file_ids.get(old_path) or \ self._modified_file_ids.get(old_path) if existing: # We've only just added/modified this path earlier in this commit. # Change the add/modify of old_path to an add of new_path self._rename_pending_change(old_path, new_path, existing) return file_id = inv.path2id(old_path) if file_id is None: self.warning( "ignoring rename of %s to %s - old path does not exist" % (old_path, new_path)) return ie = inv[file_id] rev_id = ie.revision new_file_id = inv.path2id(new_path) if new_file_id is not None: self.record_delete(new_path, inv[new_file_id]) self.record_rename(old_path, new_path, file_id, ie) # The revision-id for this entry will be/has been updated and # that means the loader then needs to know what the "new" text is. # We therefore must go back to the revision store to get it. lines = self.rev_store.get_file_lines(rev_id, file_id) self.data_for_commit[file_id] = ''.join(lines) def _delete_all_items(self, inv): if len(inv) == 0: return for path, ie in inv.iter_entries_by_dir(): if path != "": self.record_delete(path, ie) def _warn_unless_in_merges(self, fileid, path): if len(self.parents) <= 1: return for parent in self.parents[1:]: if fileid in self.get_inventory(parent): return self.warning("ignoring delete of %s as not in parent inventories", path) class InventoryCommitHandler(GenericCommitHandler): """A CommitHandler that builds and saves Inventory objects.""" def pre_process_files(self): super(InventoryCommitHandler, self).pre_process_files() # Seed the inventory from the previous one. Note that # the parent class version of pre_process_files() has # already set the right basis_inventory for this branch # but we need to copy it in order to mutate it safely # without corrupting the cached inventory value. if len(self.parents) == 0: self.inventory = self.basis_inventory else: self.inventory = copy_inventory(self.basis_inventory) self.inventory_root = self.inventory.root # directory-path -> inventory-entry for current inventory self.directory_entries = dict(self.inventory.directories()) # Initialise the inventory revision info as required if self.rev_store.expects_rich_root(): self.inventory.revision_id = self.revision_id else: # In this revision store, root entries have no knit or weave. # When serializing out to disk and back in, root.revision is # always the new revision_id. self.inventory.root.revision = self.revision_id def post_process_files(self): """Save the revision.""" self.cache_mgr.inventories[self.revision_id] = self.inventory self.rev_store.load(self.revision, self.inventory, None, lambda file_id: self._get_data(file_id), lambda file_id: self._get_per_file_parents(file_id), lambda revision_ids: self._get_inventories(revision_ids)) def record_new(self, path, ie): try: # If this is a merge, the file was most likely added already. # The per-file parent(s) must therefore be calculated and # we can't assume there are none. per_file_parents, ie.revision = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[ie.file_id] = per_file_parents self.inventory.add(ie) except errors.DuplicateFileId: # Directory already exists as a file or symlink del self.inventory[ie.file_id] # Try again self.inventory.add(ie) def record_changed(self, path, ie, parent_id): # HACK: no API for this (del+add does more than it needs to) per_file_parents, ie.revision = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[ie.file_id] = per_file_parents self.inventory._byid[ie.file_id] = ie parent_ie = self.inventory._byid[parent_id] parent_ie.children[ie.name] = ie def record_delete(self, path, ie): self.inventory.remove_recursive_id(ie.file_id) def record_rename(self, old_path, new_path, file_id, ie): # For a rename, the revision-id is always the new one so # no need to change/set it here ie.revision = self.revision_id per_file_parents, _ = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[file_id] = per_file_parents new_basename, new_parent_id = self._ensure_directory(new_path, self.inventory) self.inventory.rename(file_id, new_parent_id, new_basename) def modify_handler(self, filecmd): if filecmd.dataref is not None: data = self.cache_mgr.fetch_blob(filecmd.dataref) else: data = filecmd.data self.debug("modifying %s", filecmd.path) (kind, is_executable) = mode_to_kind(filecmd.mode) self._modify_item(self._decode_path(filecmd.path), kind, is_executable, data, self.inventory) def delete_handler(self, filecmd): self.debug("deleting %s", filecmd.path) self._delete_item(self._decode_path(filecmd.path), self.inventory) def copy_handler(self, filecmd): src_path = self._decode_path(filecmd.src_path) dest_path = self._decode_path(filecmd.dest_path) self.debug("copying %s to %s", src_path, dest_path) self._copy_item(src_path, dest_path, self.inventory) def rename_handler(self, filecmd): old_path = self._decode_path(filecmd.old_path) new_path = self._decode_path(filecmd.new_path) self.debug("renaming %s to %s", old_path, new_path) self._rename_item(old_path, new_path, self.inventory) def deleteall_handler(self, filecmd): self.debug("deleting all files (and also all directories)") self._delete_all_items(self.inventory) class InventoryDeltaCommitHandler(GenericCommitHandler): """A CommitHandler that builds Inventories by applying a delta.""" def pre_process_files(self): super(InventoryDeltaCommitHandler, self).pre_process_files() self._dirs_that_might_become_empty = set() # A given file-id can only appear once so we accumulate # the entries in a dict then build the actual delta at the end self._delta_entries_by_fileid = {} if len(self.parents) == 0 or not self.rev_store.expects_rich_root(): if self.parents: old_path = '' else: old_path = None # Need to explicitly add the root entry for the first revision # and for non rich-root inventories root_id = inventory.ROOT_ID root_ie = inventory.InventoryDirectory(root_id, u'', None) root_ie.revision = self.revision_id self._add_entry((old_path, '', root_id, root_ie)) def post_process_files(self): """Save the revision.""" delta = self._get_final_delta() inv = self.rev_store.load_using_delta(self.revision, self.basis_inventory, delta, None, self._get_data, self._get_per_file_parents, self._get_inventories) self.cache_mgr.inventories[self.revision_id] = inv #print "committed %s" % self.revision_id def _get_final_delta(self): """Generate the final delta. Smart post-processing of changes, e.g. pruning of directories that would become empty, goes here. """ delta = list(self._delta_entries_by_fileid.values()) if self.prune_empty_dirs and self._dirs_that_might_become_empty: candidates = self._dirs_that_might_become_empty while candidates: never_born = set() parent_dirs_that_might_become_empty = set() for path, file_id in self._empty_after_delta(delta, candidates): newly_added = self._new_file_ids.get(path) if newly_added: never_born.add(newly_added) else: delta.append((path, None, file_id, None)) parent_dir = osutils.dirname(path) if parent_dir: parent_dirs_that_might_become_empty.add(parent_dir) candidates = parent_dirs_that_might_become_empty # Clean up entries that got deleted before they were ever added if never_born: delta = [de for de in delta if de[2] not in never_born] return delta def _empty_after_delta(self, delta, candidates): #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta])) #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates])) new_inv = self._get_proposed_inventory(delta) result = [] for dir in candidates: file_id = new_inv.path2id(dir) if file_id is None: continue ie = new_inv[file_id] if ie.kind != 'directory': continue if len(ie.children) == 0: result.append((dir, file_id)) if self.verbose: self.note("pruning empty directory %s" % (dir,)) return result def _get_proposed_inventory(self, delta): if len(self.parents): # new_inv = self.basis_inventory._get_mutable_inventory() # Note that this will create unreferenced chk pages if we end up # deleting entries, because this 'test' inventory won't end up # used. However, it is cheaper than having to create a full copy of # the inventory for every commit. new_inv = self.basis_inventory.create_by_apply_delta(delta, 'not-a-valid-revision-id:') else: new_inv = inventory.Inventory(revision_id=self.revision_id) # This is set in the delta so remove it to prevent a duplicate del new_inv[inventory.ROOT_ID] try: new_inv.apply_delta(delta) except errors.InconsistentDelta: self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta])) raise return new_inv def _add_entry(self, entry): # We need to combine the data if multiple entries have the same file-id. # For example, a rename followed by a modification looks like: # # (x, y, f, e) & (y, y, f, g) => (x, y, f, g) # # Likewise, a modification followed by a rename looks like: # # (x, x, f, e) & (x, y, f, g) => (x, y, f, g) # # Here's a rename followed by a delete and a modification followed by # a delete: # # (x, y, f, e) & (y, None, f, None) => (x, None, f, None) # (x, x, f, e) & (x, None, f, None) => (x, None, f, None) # # In summary, we use the original old-path, new new-path and new ie # when combining entries. old_path = entry[0] new_path = entry[1] file_id = entry[2] ie = entry[3] existing = self._delta_entries_by_fileid.get(file_id, None) if existing is not None: old_path = existing[0] entry = (old_path, new_path, file_id, ie) if new_path is None and old_path is None: # This is a delete cancelling a previous add del self._delta_entries_by_fileid[file_id] parent_dir = osutils.dirname(existing[1]) self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir)) if parent_dir: self._dirs_that_might_become_empty.add(parent_dir) return else: self._delta_entries_by_fileid[file_id] = entry # Collect parent directories that might become empty if new_path is None: # delete parent_dir = osutils.dirname(old_path) # note: no need to check the root if parent_dir: self._dirs_that_might_become_empty.add(parent_dir) elif old_path is not None and old_path != new_path: # rename old_parent_dir = osutils.dirname(old_path) new_parent_dir = osutils.dirname(new_path) if old_parent_dir and old_parent_dir != new_parent_dir: self._dirs_that_might_become_empty.add(old_parent_dir) # Calculate the per-file parents, if not already done if file_id in self.per_file_parents_for_commit: return if old_path is None: # add # If this is a merge, the file was most likely added already. # The per-file parent(s) must therefore be calculated and # we can't assume there are none. per_file_parents, ie.revision = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[file_id] = per_file_parents elif new_path is None: # delete pass elif old_path != new_path: # rename per_file_parents, _ = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[file_id] = per_file_parents else: # modify per_file_parents, ie.revision = \ self.rev_store.get_parents_and_revision_for_entry(ie) self.per_file_parents_for_commit[file_id] = per_file_parents def record_new(self, path, ie): self._add_entry((None, path, ie.file_id, ie)) def record_changed(self, path, ie, parent_id=None): self._add_entry((path, path, ie.file_id, ie)) self._modified_file_ids[path] = ie.file_id def record_delete(self, path, ie): self._add_entry((path, None, ie.file_id, None)) self._paths_deleted_this_commit.add(path) if ie.kind == 'directory': try: del self.directory_entries[path] except KeyError: pass for child_relpath, entry in \ self.basis_inventory.iter_entries_by_dir(from_dir=ie): child_path = osutils.pathjoin(path, child_relpath) self._add_entry((child_path, None, entry.file_id, None)) self._paths_deleted_this_commit.add(child_path) if entry.kind == 'directory': try: del self.directory_entries[child_path] except KeyError: pass def record_rename(self, old_path, new_path, file_id, old_ie): new_ie = old_ie.copy() new_basename, new_parent_id = self._ensure_directory(new_path, self.basis_inventory) new_ie.name = new_basename new_ie.parent_id = new_parent_id new_ie.revision = self.revision_id self._add_entry((old_path, new_path, file_id, new_ie)) self._modified_file_ids[new_path] = file_id self._paths_deleted_this_commit.discard(new_path) if new_ie.kind == 'directory': self.directory_entries[new_path] = new_ie def _rename_pending_change(self, old_path, new_path, file_id): """Instead of adding/modifying old-path, add new-path instead.""" # note: delta entries look like (old, new, file-id, ie) old_ie = self._delta_entries_by_fileid[file_id][3] # Delete the old path. Note that this might trigger implicit # deletion of newly created parents that could now become empty. self.record_delete(old_path, old_ie) # Update the dictionaries used for tracking new file-ids if old_path in self._new_file_ids: del self._new_file_ids[old_path] else: del self._modified_file_ids[old_path] self._new_file_ids[new_path] = file_id # Create the new InventoryEntry kind = old_ie.kind basename, parent_id = self._ensure_directory(new_path, self.basis_inventory) ie = inventory.make_entry(kind, basename, parent_id, file_id) ie.revision = self.revision_id if kind == 'file': ie.executable = old_ie.executable ie.text_sha1 = old_ie.text_sha1 ie.text_size = old_ie.text_size elif kind == 'symlink': ie.symlink_target = old_ie.symlink_target # Record it self.record_new(new_path, ie) def modify_handler(self, filecmd): (kind, executable) = mode_to_kind(filecmd.mode) if filecmd.dataref is not None: if kind == "directory": data = None elif kind == "tree-reference": data = filecmd.dataref else: data = self.cache_mgr.fetch_blob(filecmd.dataref) else: data = filecmd.data self.debug("modifying %s", filecmd.path) decoded_path = self._decode_path(filecmd.path) self._modify_item(decoded_path, kind, executable, data, self.basis_inventory) def delete_handler(self, filecmd): self.debug("deleting %s", filecmd.path) self._delete_item( self._decode_path(filecmd.path), self.basis_inventory) def copy_handler(self, filecmd): src_path = self._decode_path(filecmd.src_path) dest_path = self._decode_path(filecmd.dest_path) self.debug("copying %s to %s", src_path, dest_path) self._copy_item(src_path, dest_path, self.basis_inventory) def rename_handler(self, filecmd): old_path = self._decode_path(filecmd.old_path) new_path = self._decode_path(filecmd.new_path) self.debug("renaming %s to %s", old_path, new_path) self._rename_item(old_path, new_path, self.basis_inventory) def deleteall_handler(self, filecmd): self.debug("deleting all files (and also all directories)") self._delete_all_items(self.basis_inventory) bzr-fastimport-0.13.0+bzr361/cache_manager.py0000644000000000000000000002423112335104053017034 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """A manager of caches.""" import atexit import os import shutil import tempfile import weakref from bzrlib import lru_cache, trace from bzrlib.plugins.fastimport import ( branch_mapper, ) from bzrlib.plugins.fastimport.reftracker import ( RefTracker, ) from bzrlib.plugins.fastimport.helpers import ( single_plural, ) class _Cleanup(object): """This class makes sure we clean up when CacheManager goes away. We use a helper class to ensure that we are never in a refcycle. """ def __init__(self, disk_blobs): self.disk_blobs = disk_blobs self.tempdir = None self.small_blobs = None def __del__(self): self.finalize() def finalize(self): if self.disk_blobs is not None: for info in self.disk_blobs.itervalues(): if info[-1] is not None: os.unlink(info[-1]) self.disk_blobs = None if self.small_blobs is not None: self.small_blobs.close() self.small_blobs = None if self.tempdir is not None: shutil.rmtree(self.tempdir) class CacheManager(object): _small_blob_threshold = 25*1024 _sticky_cache_size = 300*1024*1024 _sticky_flushed_size = 100*1024*1024 def __init__(self, info=None, verbose=False, inventory_cache_size=10): """Create a manager of caches. :param info: a ConfigObj holding the output from the --info processor, or None if no hints are available """ self.verbose = verbose # dataref -> data. datref is either :mark or the sha-1. # Sticky blobs are referenced more than once, and are saved until their # refcount goes to 0 self._blobs = {} self._sticky_blobs = {} self._sticky_memory_bytes = 0 # if we overflow our memory cache, then we will dump large blobs to # disk in this directory self._tempdir = None # id => (offset, n_bytes, fname) # if fname is None, then the content is stored in the small file self._disk_blobs = {} self._cleanup = _Cleanup(self._disk_blobs) # revision-id -> Inventory cache # these are large and we probably don't need too many as # most parents are recent in history self.inventories = lru_cache.LRUCache(inventory_cache_size) # import commmit-ids -> revision-id lookup table # we need to keep all of these but they are small self.marks = {} # (path, branch_ref) -> file-ids - as generated. # (Use store_file_id/fetch_fileid methods rather than direct access.) # Work out the blobs to make sticky - None means all self._blob_ref_counts = {} if info is not None: try: blobs_by_counts = info['Blob reference counts'] # The parser hands values back as lists, already parsed for count, blob_list in blobs_by_counts.items(): n = int(count) for b in blob_list: self._blob_ref_counts[b] = n except KeyError: # info not in file - possible when no blobs used pass # BranchMapper has no state (for now?), but we keep it around rather # than reinstantiate on every usage self.branch_mapper = branch_mapper.BranchMapper() self.reftracker = RefTracker() def add_mark(self, mark, commit_id): assert mark[0] != ':' self.marks[mark] = commit_id def lookup_committish(self, committish): """Resolve a 'committish' to a revision id. :param committish: A "committish" string :return: Bazaar revision id """ assert committish[0] == ':' return self.marks[committish.lstrip(':')] def dump_stats(self, note=trace.note): """Dump some statistics about what we cached.""" # TODO: add in inventory stastistics note("Cache statistics:") self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note) self._show_stats_for(self.marks, "revision-ids", note=note) # These aren't interesting so omit from the output, at least for now #self._show_stats_for(self._blobs, "other blobs", note=note) #self.reftracker.dump_stats(note=note) def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False): """Dump statistics about a given dictionary. By the key and value need to support len(). """ count = len(dict) if tuple_key: size = sum(map(len, (''.join(k) for k in dict.keys()))) else: size = sum(map(len, dict.keys())) size += sum(map(len, dict.values())) size = size * 1.0 / 1024 unit = 'K' if size > 1024: size = size / 1024 unit = 'M' if size > 1024: size = size / 1024 unit = 'G' note(" %-12s: %8.1f %s (%d %s)" % (label, size, unit, count, single_plural(count, "item", "items"))) def clear_all(self): """Free up any memory used by the caches.""" self._blobs.clear() self._sticky_blobs.clear() self.marks.clear() self.reftracker.clear() self.inventories.clear() def _flush_blobs_to_disk(self): blobs = self._sticky_blobs.keys() sticky_blobs = self._sticky_blobs total_blobs = len(sticky_blobs) blobs.sort(key=lambda k:len(sticky_blobs[k])) if self._tempdir is None: tempdir = tempfile.mkdtemp(prefix='fastimport_blobs-') self._tempdir = tempdir self._cleanup.tempdir = self._tempdir self._cleanup.small_blobs = tempfile.TemporaryFile( prefix='small-blobs-', dir=self._tempdir) small_blob_ref = weakref.ref(self._cleanup.small_blobs) # Even though we add it to _Cleanup it seems that the object can be # destroyed 'too late' for cleanup to actually occur. Probably a # combination of bzr's "die directly, don't clean up" and how # exceptions close the running stack. def exit_cleanup(): small_blob = small_blob_ref() if small_blob is not None: small_blob.close() shutil.rmtree(tempdir, ignore_errors=True) atexit.register(exit_cleanup) count = 0 bytes = 0 n_small_bytes = 0 while self._sticky_memory_bytes > self._sticky_flushed_size: id = blobs.pop() blob = self._sticky_blobs.pop(id) n_bytes = len(blob) self._sticky_memory_bytes -= n_bytes if n_bytes < self._small_blob_threshold: f = self._cleanup.small_blobs f.seek(0, os.SEEK_END) self._disk_blobs[id] = (f.tell(), n_bytes, None) f.write(blob) n_small_bytes += n_bytes else: fd, name = tempfile.mkstemp(prefix='blob-', dir=self._tempdir) os.write(fd, blob) os.close(fd) self._disk_blobs[id] = (0, n_bytes, name) bytes += n_bytes del blob count += 1 trace.note('flushed %d/%d blobs w/ %.1fMB (%.1fMB small) to disk' % (count, total_blobs, bytes / 1024. / 1024, n_small_bytes / 1024. / 1024)) def store_blob(self, id, data): """Store a blob of data.""" # Note: If we're not reference counting, everything has to be sticky if not self._blob_ref_counts or id in self._blob_ref_counts: self._sticky_blobs[id] = data self._sticky_memory_bytes += len(data) if self._sticky_memory_bytes > self._sticky_cache_size: self._flush_blobs_to_disk() elif data == '': # Empty data is always sticky self._sticky_blobs[id] = data else: self._blobs[id] = data def _decref(self, id, cache, fn): if not self._blob_ref_counts: return False count = self._blob_ref_counts.get(id, None) if count is not None: count -= 1 if count <= 0: del cache[id] if fn is not None: os.unlink(fn) del self._blob_ref_counts[id] return True else: self._blob_ref_counts[id] = count return False def fetch_blob(self, id): """Fetch a blob of data.""" if id in self._blobs: return self._blobs.pop(id) if id in self._disk_blobs: (offset, n_bytes, fn) = self._disk_blobs[id] if fn is None: f = self._cleanup.small_blobs f.seek(offset) content = f.read(n_bytes) else: fp = open(fn, 'rb') try: content = fp.read() finally: fp.close() self._decref(id, self._disk_blobs, fn) return content content = self._sticky_blobs[id] if self._decref(id, self._sticky_blobs, None): self._sticky_memory_bytes -= len(content) return content def invert_dictset(d): """Invert a dictionary with keys matching a set of values, turned into lists.""" # Based on recipe from ASPN result = {} for k, c in d.iteritems(): for v in c: keys = result.setdefault(v, []) keys.append(k) return result bzr-fastimport-0.13.0+bzr361/cmds.py0000644000000000000000000007052612335104053015235 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Fastimport/fastexport commands.""" from bzrlib import bzrdir from bzrlib.commands import Command from bzrlib.option import Option, ListOption, RegistryOption from bzrlib.plugins.fastimport import ( helpers, load_fastimport, ) def _run(source, processor_factory, verbose=False, user_map=None, **kwargs): """Create and run a processor. :param source: a filename or '-' for standard input. If the filename ends in .gz, it will be opened as a gzip file and the stream will be implicitly uncompressed :param processor_factory: a callable for creating a processor :param user_map: if not None, the file containing the user map. """ from fastimport.errors import ParsingError from bzrlib.errors import BzrCommandError from fastimport import parser stream = _get_source_stream(source) user_mapper = _get_user_mapper(user_map) proc = processor_factory(verbose=verbose, **kwargs) p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper) try: return proc.process(p.iter_commands) except ParsingError, e: raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e)) def _get_source_stream(source): if source == '-' or source is None: import sys stream = helpers.binary_stream(sys.stdin) elif source.endswith('.gz'): import gzip stream = gzip.open(source, "rb") else: stream = open(source, "rb") return stream def _get_user_mapper(filename): import user_mapper if filename is None: return None f = open(filename) lines = f.readlines() f.close() return user_mapper.UserMapper(lines) class cmd_fast_import(Command): """Backend for fast Bazaar data importers. This command reads a mixed command/data stream and creates branches in a Bazaar repository accordingly. The preferred recipe is:: bzr fast-import project.fi project.bzr Numerous commands are provided for generating a fast-import file to use as input. To specify standard input as the input stream, use a source name of '-' (instead of project.fi). If the source name ends in '.gz', it is assumed to be compressed in gzip format. project.bzr will be created if it doesn't exist. If it exists already, it should be empty or be an existing Bazaar repository or branch. If not specified, the current directory is assumed. fast-import will intelligently select the format to use when creating a repository or branch. If you are running Bazaar 1.17 up to Bazaar 2.0, the default format for Bazaar 2.x ("2a") is used. Otherwise, the current default format ("pack-0.92" for Bazaar 1.x) is used. If you wish to specify a custom format, use the `--format` option. .. note:: To maintain backwards compatibility, fast-import lets you create the target repository or standalone branch yourself. It is recommended though that you let fast-import create these for you instead. :Branch mapping rules: Git reference names are mapped to Bazaar branch names as follows: * refs/heads/foo is mapped to foo * refs/remotes/origin/foo is mapped to foo.remote * refs/tags/foo is mapped to foo.tag * */master is mapped to trunk, trunk.remote, etc. * */trunk is mapped to git-trunk, git-trunk.remote, etc. :Branch creation rules: When a shared repository is created or found at the destination, branches are created inside it. In the simple case of a single branch (refs/heads/master) inside the input file, the branch is project.bzr/trunk. When a standalone branch is found at the destination, the trunk is imported there and warnings are output about any other branches found in the input file. When a branch in a shared repository is found at the destination, that branch is made the trunk and other branches, if any, are created in sister directories. :Working tree updates: The working tree is generated for the trunk branch. If multiple branches are created, a message is output on completion explaining how to create the working trees for other branches. :Custom exporters: The fast-export-from-xxx commands typically call more advanced xxx-fast-export scripts. You are welcome to use the advanced scripts if you prefer. If you wish to write a custom exporter for your project, see http://bazaar-vcs.org/BzrFastImport for the detailed protocol specification. In many cases, exporters can be written quite quickly using whatever scripting/programming language you like. :User mapping: Some source repositories store just the user name while Bazaar prefers a full email address. You can adjust user-ids while importing by using the --user-map option. The argument is a text file with lines in the format:: old-id = new-id Blank lines and lines beginning with # are ignored. If old-id has the special value '@', then users without an email address will get one created by using the matching new-id as the domain, unless a more explicit address is given for them. For example, given the user-map of:: @ = example.com bill = William Jones then user-ids are mapped as follows:: maria => maria bill => William Jones .. note:: User mapping is supported by both the fast-import and fast-import-filter commands. :Blob tracking: As some exporters (like git-fast-export) reuse blob data across commits, fast-import makes two passes over the input file by default. In the first pass, it collects data about what blobs are used when, along with some other statistics (e.g. total number of commits). In the second pass, it generates the repository and branches. .. note:: The initial pass isn't done if the --info option is used to explicitly pass in information about the input stream. It also isn't done if the source is standard input. In the latter case, memory consumption may be higher than otherwise because some blobs may be kept in memory longer than necessary. :Restarting an import: At checkpoints and on completion, the commit-id -> revision-id map is saved to a file called 'fastimport-id-map' in the control directory for the repository (e.g. .bzr/repository). If the import is interrupted or unexpectedly crashes, it can be started again and this file will be used to skip over already loaded revisions. As long as subsequent exports from the original source begin with exactly the same revisions, you can use this feature to maintain a mirror of a repository managed by a foreign tool. If and when Bazaar is used to manage the repository, this file can be safely deleted. :Examples: Import a Subversion repository into Bazaar:: svn-fast-export /svn/repo/path > project.fi bzr fast-import project.fi project.bzr Import a CVS repository into Bazaar:: cvs2git /cvs/repo/path > project.fi bzr fast-import project.fi project.bzr Import a Git repository into Bazaar:: cd /git/repo/path git fast-export --all > project.fi bzr fast-import project.fi project.bzr Import a Mercurial repository into Bazaar:: cd /hg/repo/path hg fast-export > project.fi bzr fast-import project.fi project.bzr Import a Darcs repository into Bazaar:: cd /darcs/repo/path darcs-fast-export > project.fi bzr fast-import project.fi project.bzr """ hidden = False _see_also = ['fast-export', 'fast-import-filter', 'fast-import-info'] takes_args = ['source', 'destination?'] takes_options = ['verbose', Option('user-map', type=str, help="Path to file containing a map of user-ids.", ), Option('info', type=str, help="Path to file containing caching hints.", ), Option('trees', help="Update all working trees, not just trunk's.", ), Option('count', type=int, help="Import this many revisions then exit.", ), Option('checkpoint', type=int, help="Checkpoint automatically every N revisions." " The default is 10000.", ), Option('autopack', type=int, help="Pack every N checkpoints. The default is 4.", ), Option('inv-cache', type=int, help="Number of inventories to cache.", ), RegistryOption.from_kwargs('mode', 'The import algorithm to use.', title='Import Algorithm', default='Use the preferred algorithm (inventory deltas).', classic="Use the original algorithm (mutable inventories).", experimental="Enable experimental features.", value_switches=True, enum_switch=False, ), Option('import-marks', type=str, help="Import marks from file." ), Option('export-marks', type=str, help="Export marks to file." ), RegistryOption('format', help='Specify a format for the created repository. See' ' "bzr help formats" for details.', lazy_registry=('bzrlib.bzrdir', 'format_registry'), converter=lambda name: bzrdir.format_registry.make_bzrdir(name), value_switches=False, title='Repository format'), ] def run(self, source, destination='.', verbose=False, info=None, trees=False, count=-1, checkpoint=10000, autopack=4, inv_cache=-1, mode=None, import_marks=None, export_marks=None, format=None, user_map=None): load_fastimport() from bzrlib.plugins.fastimport.processors import generic_processor from bzrlib.plugins.fastimport.helpers import ( open_destination_directory, ) control = open_destination_directory(destination, format=format) # If an information file was given and the source isn't stdin, # generate the information by reading the source file as a first pass if info is None and source != '-': info = self._generate_info(source) # Do the work if mode is None: mode = 'default' params = { 'info': info, 'trees': trees, 'count': count, 'checkpoint': checkpoint, 'autopack': autopack, 'inv-cache': inv_cache, 'mode': mode, 'import-marks': import_marks, 'export-marks': export_marks, } return _run(source, generic_processor.GenericProcessor, bzrdir=control, params=params, verbose=verbose, user_map=user_map) def _generate_info(self, source): from cStringIO import StringIO from fastimport import parser from fastimport.errors import ParsingError from bzrlib.errors import BzrCommandError from bzrlib.plugins.fastimport.processors import info_processor stream = _get_source_stream(source) output = StringIO() try: proc = info_processor.InfoProcessor(verbose=True, outf=output) p = parser.ImportParser(stream) try: return_code = proc.process(p.iter_commands) except ParsingError, e: raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e)) lines = output.getvalue().splitlines() finally: output.close() stream.seek(0) return lines class cmd_fast_import_filter(Command): """Filter a fast-import stream to include/exclude files & directories. This command is useful for splitting a subdirectory or bunch of files out from a project to create a new project complete with history for just those files. It can also be used to create a new project repository that removes all references to files that should not have been committed, e.g. security-related information (like passwords), commercially sensitive material, files with an incompatible license or large binary files like CD images. To specify standard input as the input stream, use a source name of '-'. If the source name ends in '.gz', it is assumed to be compressed in gzip format. :File/directory filtering: This is supported by the -i and -x options. Excludes take precedence over includes. When filtering out a subdirectory (or file), the new stream uses the subdirectory (or subdirectory containing the file) as the root. As fast-import doesn't know in advance whether a path is a file or directory in the stream, you need to specify a trailing '/' on directories passed to the `--includes option`. If multiple files or directories are given, the new root is the deepest common directory. Note: If a path has been renamed, take care to specify the *original* path name, not the final name that it ends up with. :User mapping: Some source repositories store just the user name while Bazaar prefers a full email address. You can adjust user-ids by using the --user-map option. The argument is a text file with lines in the format:: old-id = new-id Blank lines and lines beginning with # are ignored. If old-id has the special value '@', then users without an email address will get one created by using the matching new-id as the domain, unless a more explicit address is given for them. For example, given the user-map of:: @ = example.com bill = William Jones then user-ids are mapped as follows:: maria => maria bill => William Jones .. note:: User mapping is supported by both the fast-import and fast-import-filter commands. :History rewriting: By default fast-import-filter does quite aggressive history rewriting. Empty commits (or commits which had all their content filtered out) will be removed, and so are the references to commits not included in the stream. Flag --dont-squash-empty-commits reverses this behavior and makes it possible to use fast-import-filter on incremental streams. :Examples: Create a new project from a library (note the trailing / on the directory name of the library):: front-end | bzr fast-import-filter -i lib/xxx/ > xxx.fi bzr fast-import xxx.fi mylibrary.bzr (lib/xxx/foo is now foo) Create a new repository without a sensitive file:: front-end | bzr fast-import-filter -x missile-codes.txt > clean.fi bzr fast-import clean.fi clean.bzr """ hidden = False _see_also = ['fast-import'] takes_args = ['source?'] takes_options = ['verbose', ListOption('include_paths', short_name='i', type=str, help="Only include commits affecting these paths." " Directories should have a trailing /." ), ListOption('exclude_paths', short_name='x', type=str, help="Exclude these paths from commits." ), Option('user-map', type=str, help="Path to file containing a map of user-ids.", ), Option('dont-squash-empty-commits', help="Preserve all commits and links between them" ), ] encoding_type = 'exact' def run(self, source=None, verbose=False, include_paths=None, exclude_paths=None, user_map=None, dont_squash_empty_commits=False): from bzrlib.errors import BzrCommandError load_fastimport() from fastimport.processors import filter_processor params = { 'include_paths': include_paths, 'exclude_paths': exclude_paths, } if ('squash_empty_commits' in filter_processor.FilterProcessor.known_params): params['squash_empty_commits'] = (not dont_squash_empty_commits) else: if dont_squash_empty_commits: raise BzrCommandError("installed python-fastimport does not " "support not squashing empty commits. Please install " " a newer python-fastimport to use " "--dont-squash-empty-commits") from fastimport.errors import ParsingError from fastimport import parser stream = _get_source_stream(source) user_mapper = _get_user_mapper(user_map) proc = filter_processor.FilterProcessor(params=params, verbose=verbose) p = parser.ImportParser(stream, verbose=verbose, user_mapper=user_mapper) try: return proc.process(p.iter_commands) except ParsingError, e: raise BzrCommandError("%d: Parse error: %s" % (e.lineno, e)) class cmd_fast_import_info(Command): """Output information about a fast-import stream. This command reads a fast-import stream and outputs statistics and interesting properties about what it finds. When run in verbose mode, the information is output as a configuration file that can be passed to fast-import to assist it in intelligently caching objects. To specify standard input as the input stream, use a source name of '-'. If the source name ends in '.gz', it is assumed to be compressed in gzip format. :Examples: Display statistics about the import stream produced by front-end:: front-end | bzr fast-import-info - Create a hints file for running fast-import on a large repository:: front-end | bzr fast-import-info -v - > front-end.cfg """ hidden = False _see_also = ['fast-import'] takes_args = ['source'] takes_options = ['verbose'] def run(self, source, verbose=False): load_fastimport() from bzrlib.plugins.fastimport.processors import info_processor return _run(source, info_processor.InfoProcessor, verbose=verbose) class cmd_fast_import_query(Command): """Query a fast-import stream displaying selected commands. To specify standard input as the input stream, use a source name of '-'. If the source name ends in '.gz', it is assumed to be compressed in gzip format. To specify a commit to display, give its mark using the --commit-mark option. The commit will be displayed with file-commands included but with inline blobs hidden. To specify the commands to display, use the -C option one or more times. To specify just some fields for a command, use the syntax:: command=field1,... By default, the nominated fields for the nominated commands are displayed tab separated. To see the information in a name:value format, use verbose mode. Note: Binary fields (e.g. data for blobs) are masked out so it is generally safe to view the output in a terminal. :Examples: Show the commit with mark 429:: bzr fast-import-query xxx.fi -m429 Show all the fields of the reset and tag commands:: bzr fast-import-query xxx.fi -Creset -Ctag Show the mark and merge fields of the commit commands:: bzr fast-import-query xxx.fi -Ccommit=mark,merge """ hidden = True _see_also = ['fast-import', 'fast-import-filter'] takes_args = ['source'] takes_options = ['verbose', Option('commit-mark', short_name='m', type=str, help="Mark of the commit to display." ), ListOption('commands', short_name='C', type=str, help="Display fields for these commands." ), ] def run(self, source, verbose=False, commands=None, commit_mark=None): load_fastimport() from fastimport.processors import query_processor from bzrlib.plugins.fastimport import helpers params = helpers.defines_to_dict(commands) or {} if commit_mark: params['commit-mark'] = commit_mark return _run(source, query_processor.QueryProcessor, params=params, verbose=verbose) class cmd_fast_export(Command): """Generate a fast-import stream from a Bazaar branch. This program generates a stream from a Bazaar branch in fast-import format used by tools such as bzr fast-import, git-fast-import and hg-fast-import. It takes two optional arguments: the source bzr branch to export and the destination to write the file to write the fastimport stream to. If no source is specified, it will search for a branch in the current directory. If no destination is given or the destination is '-', standard output is used. Otherwise, the destination is the name of a file. If the destination ends in '.gz', the output will be compressed into gzip format. :Round-tripping: Recent versions of the fast-import specification support features that allow effective round-tripping most of the metadata in Bazaar branches. As such, fast-exporting a branch and fast-importing the data produced will create a new repository with roughly equivalent history, i.e. "bzr log -v -p --include-merges --forward" on the old branch and new branch should produce similar, if not identical, results. .. note:: Be aware that the new repository may appear to have similar history but internally it is quite different with new revision-ids and file-ids assigned. As a consequence, the ability to easily merge with branches based on the old repository is lost. Depending on your reasons for producing a new repository, this may or may not be an issue. :Interoperability: fast-export can use the following "extended features" to produce a richer data stream: * *multiple-authors* - if a commit has multiple authors (as commonly occurs in pair-programming), all authors will be included in the output, not just the first author * *commit-properties* - custom metadata per commit that Bazaar stores in revision properties (e.g. branch-nick and bugs fixed by this change) will be included in the output. * *empty-directories* - directories, even the empty ones, will be included in the output. To disable these features and produce output acceptable to git 1.6, use the --plain option. To enable these features, use --no-plain. Currently, --plain is the default but that will change in the near future once the feature names and definitions are formally agreed to by the broader fast-import developer community. Git has stricter naming rules for tags and fast-export --plain will skip tags which can't be imported into git. To replace characters unsupported in git with an underscore instead, specify --rewrite-tag-names. :History truncation: It is sometimes convenient to simply truncate the revision history at a certain point. The --baseline option, to be used in conjunction with -r, emits a baseline commit containing the state of the entire source tree at the first requested revision. This allows a user to produce a tree identical to the original without munging multiple exports. :Examples: To produce data destined for import into Bazaar:: bzr fast-export --no-plain my-bzr-branch my.fi.gz To produce data destined for Git 1.6:: bzr fast-export --plain my-bzr-branch my.fi To import several unmerged but related branches into the same repository, use the --{export,import}-marks options, and specify a name for the git branch like this:: bzr fast-export --export-marks=marks.bzr project.dev | GIT_DIR=project/.git git-fast-import --export-marks=marks.git bzr fast-export --import-marks=marks.bzr -b other project.other | GIT_DIR=project/.git git-fast-import --import-marks=marks.git If you get a "Missing space after source" error from git-fast-import, see the top of the commands.py module for a work-around. Since bzr uses per-branch tags and git/hg use per-repo tags, the way bzr fast-export presently emits tags (unconditional reset & new ref) may result in clashes when several different branches are imported into single git/hg repo. If this occurs, use the bzr fast-export option --no-tags during the export of one or more branches to avoid the issue. """ hidden = False _see_also = ['fast-import', 'fast-import-filter'] takes_args = ['source?', 'destination?'] takes_options = ['verbose', 'revision', Option('git-branch', short_name='b', type=str, argname='FILE', help='Name of the git branch to create (default=master).' ), Option('checkpoint', type=int, argname='N', help="Checkpoint every N revisions (default=10000)." ), Option('marks', type=str, argname='FILE', help="Import marks from and export marks to file." ), Option('import-marks', type=str, argname='FILE', help="Import marks from file." ), Option('export-marks', type=str, argname='FILE', help="Export marks to file." ), Option('plain', help="Exclude metadata to maximise interoperability." ), Option('rewrite-tag-names', help="Replace characters invalid in git with '_'" " (plain mode only).", ), Option('baseline', help="Export an 'absolute' baseline commit prior to" "the first relative commit", ), Option('no-tags', help="Don't export tags" ), ] encoding_type = 'exact' def run(self, source=None, destination=None, verbose=False, git_branch="master", checkpoint=10000, marks=None, import_marks=None, export_marks=None, revision=None, plain=True, rewrite_tag_names=False, no_tags=False, baseline=False): load_fastimport() from bzrlib.branch import Branch from bzrlib.plugins.fastimport import exporter if marks: import_marks = export_marks = marks # Open the source if source is None: source = "." branch = Branch.open_containing(source)[0] outf = exporter._get_output_stream(destination) exporter = exporter.BzrFastExporter(branch, outf=outf, ref="refs/heads/%s" % git_branch, checkpoint=checkpoint, import_marks_file=import_marks, export_marks_file=export_marks, revision=revision, verbose=verbose, plain_format=plain, rewrite_tags=rewrite_tag_names, no_tags=no_tags, baseline=baseline) return exporter.run() bzr-fastimport-0.13.0+bzr361/doc/0000755000000000000000000000000010754757632014514 5ustar 00000000000000bzr-fastimport-0.13.0+bzr361/explorer/0000755000000000000000000000000011331223130015554 5ustar 00000000000000bzr-fastimport-0.13.0+bzr361/exporter.py0000644000000000000000000006130612707165740016170 0ustar 00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Based on bzr-fast-export # Copyright (c) 2008 Adeodato Simó # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # vim: fileencoding=utf-8 """Core engine for the fast-export command.""" # TODO: if a new_git_branch below gets merged repeatedly, the tip of the branch # is not updated (because the parent of commit is already merged, so we don't # set new_git_branch to the previously used name) from email.Utils import parseaddr import sys, time, re import bzrlib.branch import bzrlib.revision from bzrlib import ( builtins, errors as bazErrors, osutils, progress, trace, ) from bzrlib.plugins.fastimport import ( helpers, marks_file, ) from fastimport import commands from bzrlib.plugins.fastimport.helpers import ( binary_stream, single_plural, ) def _get_output_stream(destination): if destination is None or destination == '-': return binary_stream(sys.stdout) elif destination.endswith('gz'): import gzip return gzip.open(destination, 'wb') else: return open(destination, 'wb') # from dulwich.repo: def check_ref_format(refname): """Check if a refname is correctly formatted. Implements all the same rules as git-check-ref-format[1]. [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html :param refname: The refname to check :return: True if refname is valid, False otherwise """ # These could be combined into one big expression, but are listed separately # to parallel [1]. if '/.' in refname or refname.startswith('.'): return False if '/' not in refname: return False if '..' in refname: return False for c in refname: if ord(c) < 040 or c in '\177 ~^:?*[': return False if refname[-1] in '/.': return False if refname.endswith('.lock'): return False if '@{' in refname: return False if '\\' in refname: return False return True def sanitize_ref_name_for_git(refname): """Rewrite refname so that it will be accepted by git-fast-import. For the detailed rules see check_ref_format. By rewriting the refname we are breaking uniqueness guarantees provided by bzr so we have to manually verify that resulting ref names are unique. :param refname: refname to rewrite :return: new refname """ new_refname = re.sub( # '/.' in refname or startswith '.' r"/\.|^\." # '..' in refname r"|\.\." # ord(c) < 040 r"|[" + "".join([chr(x) for x in range(040)]) + r"]" # c in '\177 ~^:?*[' r"|[\177 ~^:?*[]" # last char in "/." r"|[/.]$" # endswith '.lock' r"|.lock$" # "@{" in refname r"|@{" # "\\" in refname r"|\\", "_", refname) return new_refname class BzrFastExporter(object): def __init__(self, source, outf, ref=None, checkpoint=-1, import_marks_file=None, export_marks_file=None, revision=None, verbose=False, plain_format=False, rewrite_tags=False, no_tags=False, baseline=False): """Export branch data in fast import format. :param plain_format: if True, 'classic' fast-import format is used without any extended features; if False, the generated data is richer and includes information like multiple authors, revision properties, etc. :param rewrite_tags: if True and if plain_format is set, tag names will be rewritten to be git-compatible. Otherwise tags which aren't valid for git will be skipped if plain_format is set. :param no_tags: if True tags won't be exported at all """ self.branch = source self.outf = outf self.ref = ref self.checkpoint = checkpoint self.import_marks_file = import_marks_file self.export_marks_file = export_marks_file self.revision = revision self.excluded_revisions = set() self.plain_format = plain_format self.rewrite_tags = rewrite_tags self.no_tags = no_tags self.baseline = baseline self._multi_author_api_available = hasattr(bzrlib.revision.Revision, 'get_apparent_authors') self.properties_to_exclude = ['authors', 'author'] # Progress reporting stuff self.verbose = verbose if verbose: self.progress_every = 100 else: self.progress_every = 1000 self._start_time = time.time() self._commit_total = 0 # Load the marks and initialise things accordingly self.revid_to_mark = {} self.branch_names = {} if self.import_marks_file: marks_info = marks_file.import_marks(self.import_marks_file) if marks_info is not None: self.revid_to_mark = dict((r, m) for m, r in marks_info.items()) # These are no longer included in the marks file #self.branch_names = marks_info[1] def interesting_history(self): if self.revision: rev1, rev2 = builtins._get_revision_range(self.revision, self.branch, "fast-export") start_rev_id = rev1.rev_id end_rev_id = rev2.rev_id else: start_rev_id = None end_rev_id = None self.note("Calculating the revisions to include ...") view_revisions = [rev_id for rev_id, _, _, _ in self.branch.iter_merge_sorted_revisions(end_rev_id, start_rev_id)] view_revisions.reverse() # If a starting point was given, we need to later check that we don't # start emitting revisions from before that point. Collect the # revisions to exclude now ... if start_rev_id is not None: self.note("Calculating the revisions to exclude ...") self.excluded_revisions = set([rev_id for rev_id, _, _, _ in self.branch.iter_merge_sorted_revisions(start_rev_id)]) if self.baseline: # needed so the first relative commit knows its parent self.excluded_revisions.remove(start_rev_id) view_revisions.insert(0, start_rev_id) return list(view_revisions) def run(self): # Export the data self.branch.repository.lock_read() try: interesting = self.interesting_history() self._commit_total = len(interesting) self.note("Starting export of %d revisions ..." % self._commit_total) if not self.plain_format: self.emit_features() if self.baseline: self.emit_baseline(interesting.pop(0), self.ref) for revid in interesting: self.emit_commit(revid, self.ref) if self.branch.supports_tags() and not self.no_tags: self.emit_tags() finally: self.branch.repository.unlock() # Save the marks if requested self._save_marks() self.dump_stats() def note(self, msg, *args): """Output a note but timestamp it.""" msg = "%s %s" % (self._time_of_day(), msg) trace.note(msg, *args) def warning(self, msg, *args): """Output a warning but timestamp it.""" msg = "%s WARNING: %s" % (self._time_of_day(), msg) trace.warning(msg, *args) def _time_of_day(self): """Time of day as a string.""" # Note: this is a separate method so tests can patch in a fixed value return time.strftime("%H:%M:%S") def report_progress(self, commit_count, details=''): if commit_count and commit_count % self.progress_every == 0: if self._commit_total: counts = "%d/%d" % (commit_count, self._commit_total) else: counts = "%d" % (commit_count,) minutes = (time.time() - self._start_time) / 60 rate = commit_count * 1.0 / minutes if rate > 10: rate_str = "at %.0f/minute " % rate else: rate_str = "at %.1f/minute " % rate self.note("%s commits exported %s%s" % (counts, rate_str, details)) def dump_stats(self): time_required = progress.str_tdelta(time.time() - self._start_time) rc = len(self.revid_to_mark) self.note("Exported %d %s in %s", rc, single_plural(rc, "revision", "revisions"), time_required) def print_cmd(self, cmd): self.outf.write("%r\n" % cmd) def _save_marks(self): if self.export_marks_file: revision_ids = dict((m, r) for r, m in self.revid_to_mark.items()) marks_file.export_marks(self.export_marks_file, revision_ids) def is_empty_dir(self, tree, path): path_id = tree.path2id(path) if path_id is None: self.warning("Skipping empty_dir detection - no file_id for %s" % (path,)) return False # Continue if path is not a directory if tree.kind(path_id) != 'directory': return False # Use treewalk to find the contents of our directory contents = list(tree.walkdirs(prefix=path))[0] if len(contents[1]) == 0: return True else: return False def emit_features(self): for feature in sorted(commands.FEATURE_NAMES): self.print_cmd(commands.FeatureCommand(feature)) def emit_baseline(self, revid, ref): # Emit a full source tree of the first commit's parent revobj = self.branch.repository.get_revision(revid) mark = 1 self.revid_to_mark[revid] = mark file_cmds = self._get_filecommands(bzrlib.revision.NULL_REVISION, revid) self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds)) def emit_commit(self, revid, ref): if revid in self.revid_to_mark or revid in self.excluded_revisions: return # Get the Revision object try: revobj = self.branch.repository.get_revision(revid) except bazErrors.NoSuchRevision: # This is a ghost revision. Mark it as not found and next! self.revid_to_mark[revid] = -1 return # Get the primary parent # TODO: Consider the excluded revisions when deciding the parents. # Currently, a commit with parents that are excluded ought to be # triggering the ref calculation below (and it is not). # IGC 20090824 ncommits = len(self.revid_to_mark) nparents = len(revobj.parent_ids) if nparents == 0: if ncommits: # This is a parentless commit but it's not the first one # output. We need to create a new temporary branch for it # otherwise git-fast-import will assume the previous commit # was this one's parent ref = self._next_tmp_ref() parent = bzrlib.revision.NULL_REVISION else: parent = revobj.parent_ids[0] # Print the commit mark = ncommits + 1 self.revid_to_mark[revid] = mark file_cmds = self._get_filecommands(parent, revid) self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds)) # Report progress and checkpoint if it's time for that self.report_progress(ncommits) if (self.checkpoint > 0 and ncommits and ncommits % self.checkpoint == 0): self.note("Exported %i commits - adding checkpoint to output" % ncommits) self._save_marks() self.print_cmd(commands.CheckpointCommand()) def _get_name_email(self, user): if user.find('<') == -1: # If the email isn't inside <>, we need to use it as the name # in order for things to round-trip correctly. # (note: parseaddr('a@b.com') => name:'', email: 'a@b.com') name = user email = '' else: name, email = parseaddr(user) return name.encode("utf-8"), email.encode("utf-8") def _get_commit_command(self, git_ref, mark, revobj, file_cmds): # Get the committer and author info committer = revobj.committer name, email = self._get_name_email(committer) committer_info = (name, email, revobj.timestamp, revobj.timezone) if self._multi_author_api_available: more_authors = revobj.get_apparent_authors() author = more_authors.pop(0) else: more_authors = [] author = revobj.get_apparent_author() if not self.plain_format and more_authors: name, email = self._get_name_email(author) author_info = (name, email, revobj.timestamp, revobj.timezone) more_author_info = [] for a in more_authors: name, email = self._get_name_email(a) more_author_info.append( (name, email, revobj.timestamp, revobj.timezone)) elif author != committer: name, email = self._get_name_email(author) author_info = (name, email, revobj.timestamp, revobj.timezone) more_author_info = None else: author_info = None more_author_info = None # Get the parents in terms of marks non_ghost_parents = [] for p in revobj.parent_ids: if p in self.excluded_revisions: continue try: parent_mark = self.revid_to_mark[p] non_ghost_parents.append(":%s" % parent_mark) except KeyError: # ghost - ignore continue if non_ghost_parents: from_ = non_ghost_parents[0] merges = non_ghost_parents[1:] else: from_ = None merges = None # Filter the revision properties. Some metadata (like the # author information) is already exposed in other ways so # don't repeat it here. if self.plain_format: properties = None else: properties = revobj.properties for prop in self.properties_to_exclude: try: del properties[prop] except KeyError: pass # Build and return the result return commands.CommitCommand(git_ref, str(mark), author_info, committer_info, revobj.message.encode("utf-8"), from_, merges, iter(file_cmds), more_authors=more_author_info, properties=properties) def _get_revision_trees(self, parent, revision_id): try: tree_old = self.branch.repository.revision_tree(parent) except bazErrors.UnexpectedInventoryFormat: self.warning("Parent is malformed - diffing against previous parent") # We can't find the old parent. Let's diff against his parent pp = self.branch.repository.get_revision(parent) tree_old = self.branch.repository.revision_tree(pp.parent_ids[0]) tree_new = None try: tree_new = self.branch.repository.revision_tree(revision_id) except bazErrors.UnexpectedInventoryFormat: # We can't really do anything anymore self.warning("Revision %s is malformed - skipping" % revision_id) return tree_old, tree_new def _get_filecommands(self, parent, revision_id): """Get the list of FileCommands for the changes between two revisions.""" tree_old, tree_new = self._get_revision_trees(parent, revision_id) if not(tree_old and tree_new): # Something is wrong with this revision - ignore the filecommands return [] changes = tree_new.changes_from(tree_old) # Make "modified" have 3-tuples, as added does my_modified = [ x[0:3] for x in changes.modified ] # The potential interaction between renames and deletes is messy. # Handle it here ... file_cmds, rd_modifies, renamed = self._process_renames_and_deletes( changes.renamed, changes.removed, revision_id, tree_old) # Map kind changes to a delete followed by an add for path, id_, kind1, kind2 in changes.kind_changed: path = self._adjust_path_for_renames(path, renamed, revision_id) # IGC: I don't understand why a delete is needed here. # In fact, it seems harmful? If you uncomment this line, # please file a bug explaining why you needed to. #file_cmds.append(commands.FileDeleteCommand(path)) my_modified.append((path, id_, kind2)) # Record modifications for path, id_, kind in changes.added + my_modified + rd_modifies: if kind == 'file': text = tree_new.get_file_text(id_) file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('file', tree_new.is_executable(id_)), None, text)) elif kind == 'symlink': file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('symlink', False), None, tree_new.get_symlink_target(id_))) elif kind == 'directory': if not self.plain_format: file_cmds.append(commands.FileModifyCommand(path.encode("utf-8"), helpers.kind_to_mode('directory', False), None, None)) else: self.warning("cannot export '%s' of kind %s yet - ignoring" % (path, kind)) return file_cmds def _process_renames_and_deletes(self, renames, deletes, revision_id, tree_old): file_cmds = [] modifies = [] renamed = [] # See https://bugs.edge.launchpad.net/bzr-fastimport/+bug/268933. # In a nutshell, there are several nasty cases: # # 1) bzr rm a; bzr mv b a; bzr commit # 2) bzr mv x/y z; bzr rm x; commmit # # The first must come out with the delete first like this: # # D a # R b a # # The second case must come out with the rename first like this: # # R x/y z # D x # # So outputting all deletes first or all renames first won't work. # Instead, we need to make multiple passes over the various lists to # get the ordering right. must_be_renamed = {} old_to_new = {} deleted_paths = set([p for p, _, _ in deletes]) for (oldpath, newpath, id_, kind, text_modified, meta_modified) in renames: emit = kind != 'directory' or not self.plain_format if newpath in deleted_paths: if emit: file_cmds.append(commands.FileDeleteCommand(newpath.encode("utf-8"))) deleted_paths.remove(newpath) if (self.is_empty_dir(tree_old, oldpath)): self.note("Skipping empty dir %s in rev %s" % (oldpath, revision_id)) continue #oldpath = self._adjust_path_for_renames(oldpath, renamed, # revision_id) renamed.append([oldpath, newpath]) old_to_new[oldpath] = newpath if emit: file_cmds.append( commands.FileRenameCommand(oldpath.encode("utf-8"), newpath.encode("utf-8"))) if text_modified or meta_modified: modifies.append((newpath, id_, kind)) # Renaming a directory implies all children must be renamed. # Note: changes_from() doesn't handle this if kind == 'directory' and tree_old.kind(id_) == 'directory': for p, e in tree_old.inventory.iter_entries_by_dir(from_dir=id_): if e.kind == 'directory' and self.plain_format: continue old_child_path = osutils.pathjoin(oldpath, p) new_child_path = osutils.pathjoin(newpath, p) must_be_renamed[old_child_path] = new_child_path # Add children not already renamed if must_be_renamed: renamed_already = set(old_to_new.keys()) still_to_be_renamed = set(must_be_renamed.keys()) - renamed_already for old_child_path in sorted(still_to_be_renamed): new_child_path = must_be_renamed[old_child_path] if self.verbose: self.note("implicitly renaming %s => %s" % (old_child_path, new_child_path)) file_cmds.append(commands.FileRenameCommand(old_child_path.encode("utf-8"), new_child_path.encode("utf-8"))) # Record remaining deletes for path, id_, kind in deletes: if path not in deleted_paths: continue if kind == 'directory' and self.plain_format: continue #path = self._adjust_path_for_renames(path, renamed, revision_id) file_cmds.append(commands.FileDeleteCommand(path.encode("utf-8"))) return file_cmds, modifies, renamed def _adjust_path_for_renames(self, path, renamed, revision_id): # If a previous rename is found, we should adjust the path for old, new in renamed: if path == old: self.note("Changing path %s given rename to %s in revision %s" % (path, new, revision_id)) path = new elif path.startswith(old + '/'): self.note( "Adjusting path %s given rename of %s to %s in revision %s" % (path, old, new, revision_id)) path = path.replace(old + "/", new + "/") return path def emit_tags(self): for tag, revid in self.branch.tags.get_tag_dict().items(): try: mark = self.revid_to_mark[revid] except KeyError: self.warning('not creating tag %r pointing to non-existent ' 'revision %s' % (tag, revid)) else: git_ref = 'refs/tags/%s' % tag.encode("utf-8") if self.plain_format and not check_ref_format(git_ref): if self.rewrite_tags: new_ref = sanitize_ref_name_for_git(git_ref) self.warning('tag %r is exported as %r to be valid in git.', git_ref, new_ref) git_ref = new_ref else: self.warning('not creating tag %r as its name would not be ' 'valid in git.', git_ref) continue self.print_cmd(commands.ResetCommand(git_ref, ":" + str(mark))) def _next_tmp_ref(self): """Return a unique branch name. The name will start with "tmp".""" prefix = 'tmp' if prefix not in self.branch_names: self.branch_names[prefix] = 0 else: self.branch_names[prefix] += 1 prefix = '%s.%d' % (prefix, self.branch_names[prefix]) return 'refs/heads/%s' % prefix bzr-fastimport-0.13.0+bzr361/helpers.py0000644000000000000000000001467012335104053015747 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Miscellaneous useful stuff.""" import stat def escape_commit_message(message): """Replace xml-incompatible control characters.""" # This really ought to be provided by bzrlib. # Code copied from bzrlib.commit. # Python strings can include characters that can't be # represented in well-formed XML; escape characters that # aren't listed in the XML specification # (http://www.w3.org/TR/REC-xml/#NT-Char). import re message, _ = re.subn( u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+', lambda match: match.group(0).encode('unicode_escape'), message) return message def best_format_for_objects_in_a_repository(repo): """Find the high-level format for branches and trees given a repository. When creating branches and working trees within a repository, Bazaar defaults to using the default format which may not be the best choice. This routine does a reverse lookup of the high-level format registry to find the high-level format that a shared repository was most likely created via. :return: the BzrDirFormat or None if no matches were found. """ # Based on code from bzrlib/info.py ... from bzrlib import bzrdir repo_format = repo._format candidates = [] non_aliases = set(bzrdir.format_registry.keys()) non_aliases.difference_update(bzrdir.format_registry.aliases()) for key in non_aliases: format = bzrdir.format_registry.make_bzrdir(key) # LocalGitBzrDirFormat has no repository_format if hasattr(format, "repository_format"): if format.repository_format == repo_format: candidates.append((key, format)) if len(candidates): # Assume the first one. Is there any reason not to do that? name, format = candidates[0] return format else: return None def open_destination_directory(location, format=None, verbose=True): """Open a destination directory and return the BzrDir. If destination has a control directory, it will be returned. Otherwise, the destination should be empty or non-existent and a shared repository will be created there. :param location: the destination directory :param format: the format to use or None for the default :param verbose: display the format used if a repository is created. :return: BzrDir for the destination """ import os from bzrlib import bzrdir, errors, trace, transport try: control, relpath = bzrdir.BzrDir.open_containing(location) # XXX: Check the relpath is None here? return control except errors.NotBranchError: pass # If the directory exists, check it is empty. Otherwise create it. if os.path.exists(location): contents = os.listdir(location) if contents: errors.BzrCommandError("Destination must have a .bzr directory, " " not yet exist or be empty - files found in %s" % (location,)) else: try: os.mkdir(location) except IOError, ex: errors.BzrCommandError("Unable to create %s: %s" % (location, ex)) # Create a repository for the nominated format. trace.note("Creating destination repository ...") if format is None: format = bzrdir.format_registry.make_bzrdir('default') to_transport = transport.get_transport(location) to_transport.ensure_base() control = format.initialize_on_transport(to_transport) repo = control.create_repository(shared=True) if verbose: from bzrlib.info import show_bzrdir_info show_bzrdir_info(repo.bzrdir, verbose=0) return control def kind_to_mode(kind, executable): if kind == "file": if executable == True: return stat.S_IFREG | 0755 elif executable == False: return stat.S_IFREG | 0644 else: raise AssertionError("Executable %r invalid" % executable) elif kind == "symlink": return stat.S_IFLNK elif kind == "directory": return stat.S_IFDIR elif kind == "tree-reference": return 0160000 else: raise AssertionError("Unknown file kind '%s'" % kind) def mode_to_kind(mode): # Note: Output from git-fast-export slightly different to spec if mode in (0644, 0100644): return 'file', False elif mode in (0755, 0100755): return 'file', True elif mode == 0040000: return 'directory', False elif mode == 0120000: return 'symlink', False elif mode == 0160000: return 'tree-reference', False else: raise AssertionError("invalid mode %o" % mode) def binary_stream(stream): """Ensure a stream is binary on Windows. :return: the stream """ try: import os if os.name == 'nt': fileno = getattr(stream, 'fileno', None) if fileno: no = fileno() if no >= 0: # -1 means we're working as subprocess import msvcrt msvcrt.setmode(no, os.O_BINARY) except ImportError: pass return stream def single_plural(n, single, plural): """Return a single or plural form of a noun based on number.""" if n == 1: return single else: return plural def invert_dictset(d): """Invert a dictionary with keys matching a set of values, turned into lists.""" # Based on recipe from ASPN result = {} for k, c in d.iteritems(): for v in c: keys = result.setdefault(v, []) keys.append(k) return result def invert_dict(d): """Invert a dictionary with keys matching each value turned into a list.""" # Based on recipe from ASPN result = {} for k, v in d.iteritems(): keys = result.setdefault(v, []) keys.append(k) return result bzr-fastimport-0.13.0+bzr361/idmapfile.py0000644000000000000000000000407511723157030016240 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Routines for saving and loading the id-map file.""" import os def save_id_map(filename, revision_ids): """Save the mapping of commit ids to revision ids to a file. Throws the usual exceptions if the file cannot be opened, written to or closed. :param filename: name of the file to save the data to :param revision_ids: a dictionary of commit ids to revision ids. """ f = open(filename, 'wb') try: for commit_id, rev_id in revision_ids.iteritems(): f.write("%s %s\n" % (commit_id, rev_id)) f.flush() finally: f.close() def load_id_map(filename): """Load the mapping of commit ids to revision ids from a file. If the file does not exist, an empty result is returned. If the file does exists but cannot be opened, read or closed, the normal exceptions are thrown. NOTE: It is assumed that commit-ids do not have embedded spaces. :param filename: name of the file to save the data to :result: map, count where: map = a dictionary of commit ids to revision ids; count = the number of keys in map """ result = {} count = 0 if os.path.exists(filename): f = open(filename) try: for line in f: parts = line[:-1].split(' ', 1) result[parts[0]] = parts[1] count += 1 finally: f.close() return result, count bzr-fastimport-0.13.0+bzr361/info.py0000644000000000000000000000041611723410610015230 0ustar 00000000000000bzr_plugin_name = 'fastimport' bzr_commands = [ "fast-import", "fast-import-filter", "fast-import-info", "fast-import-query", "fast-export", ] bzr_plugin_version = (0, 14, 0, 'dev', 0) bzr_minimum_version = (2, 0, 0) bzr_maximum_version = None bzr-fastimport-0.13.0+bzr361/marks_file.py0000644000000000000000000000461111643171110016412 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Routines for reading/writing a marks file.""" from bzrlib.trace import warning def import_marks(filename): """Read the mapping of marks to revision-ids from a file. :param filename: the file to read from :return: None if an error is encountered or a dictionary with marks as keys and revision-ids as values """ # Check that the file is readable and in the right format try: f = file(filename) except IOError: warning("Could not import marks file %s - not importing marks", filename) return None # Read the revision info revision_ids = {} line = f.readline() if line == 'format=1\n': # Cope with old-style marks files # Read the branch info branch_names = {} for string in f.readline().rstrip('\n').split('\0'): if not string: continue name, integer = string.rsplit('.', 1) branch_names[name] = int(integer) line = f.readline() while line: line = line.rstrip('\n') mark, revid = line.split(' ', 1) mark = mark.lstrip(':') revision_ids[mark] = revid line = f.readline() f.close() return revision_ids def export_marks(filename, revision_ids): """Save marks to a file. :param filename: filename to save data to :param revision_ids: dictionary mapping marks -> bzr revision-ids """ try: f = file(filename, 'w') except IOError: warning("Could not open export-marks file %s - not exporting marks", filename) return # Write the revision info for mark, revid in revision_ids.iteritems(): f.write(':%s %s\n' % (str(mark).lstrip(':'), revid)) f.close() bzr-fastimport-0.13.0+bzr361/processors/0000755000000000000000000000000010754757632016151 5ustar 00000000000000bzr-fastimport-0.13.0+bzr361/reftracker.py0000644000000000000000000000413111723157030016427 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Tracker of refs.""" class RefTracker(object): def __init__(self): # Head tracking: last ref, last id per ref & map of commit ids to ref*s* self.last_ref = None self.last_ids = {} self.heads = {} def dump_stats(self, note): self._show_stats_for(self.last_ids, "last-ids", note=note) self._show_stats_for(self.heads, "heads", note=note) def clear(self): self.last_ids.clear() self.heads.clear() def track_heads(self, cmd): """Track the repository heads given a CommitCommand. :param cmd: the CommitCommand :return: the list of parents in terms of commit-ids """ # Get the true set of parents if cmd.from_ is not None: parents = [cmd.from_] else: last_id = self.last_ids.get(cmd.ref) if last_id is not None: parents = [last_id] else: parents = [] parents.extend(cmd.merges) # Track the heads self.track_heads_for_ref(cmd.ref, cmd.id, parents) return parents def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): if parents is not None: for parent in parents: if parent in self.heads: del self.heads[parent] self.heads.setdefault(cmd_id, set()).add(cmd_ref) self.last_ids[cmd_ref] = cmd_id self.last_ref = cmd_ref bzr-fastimport-0.13.0+bzr361/revision_store.py0000644000000000000000000010011111702775502017353 0ustar 00000000000000# Copyright (C) 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """An abstraction of a repository providing just the bits importing needs.""" import cStringIO from bzrlib import ( errors, graph as _mod_graph, inventory, knit, lru_cache, osutils, revision as _mod_revision, trace, ) class _TreeShim(object): """Fake a Tree implementation. This implements just enough of the tree api to make commit builder happy. """ def __init__(self, repo, basis_inv, inv_delta, content_provider): self._repo = repo self._content_provider = content_provider self._basis_inv = basis_inv self._inv_delta = inv_delta self._new_info_by_id = dict([(file_id, (new_path, ie)) for _, new_path, file_id, ie in inv_delta]) def id2path(self, file_id): if file_id in self._new_info_by_id: new_path = self._new_info_by_id[file_id][0] if new_path is None: raise errors.NoSuchId(self, file_id) return new_path return self._basis_inv.id2path(file_id) def path2id(self, path): # CommitBuilder currently only requires access to the root id. We don't # build a map of renamed files, etc. One possibility if we ever *do* # need more than just root, is to defer to basis_inv.path2id() and then # check if the file_id is in our _new_info_by_id dict. And in that # case, return _new_info_by_id[file_id][0] if path != '': raise NotImplementedError(_TreeShim.path2id) # TODO: Handle root renames? return self._basis_inv.root.file_id def get_file_with_stat(self, file_id, path=None): content = self.get_file_text(file_id, path) sio = cStringIO.StringIO(content) return sio, None def get_file_text(self, file_id, path=None): try: return self._content_provider(file_id) except KeyError: # The content wasn't shown as 'new'. Just validate this fact assert file_id not in self._new_info_by_id old_ie = self._basis_inv[file_id] old_text_key = (file_id, old_ie.revision) stream = self._repo.texts.get_record_stream([old_text_key], 'unordered', True) return stream.next().get_bytes_as('fulltext') def get_symlink_target(self, file_id): if file_id in self._new_info_by_id: ie = self._new_info_by_id[file_id][1] return ie.symlink_target return self._basis_inv[file_id].symlink_target def get_reference_revision(self, file_id, path=None): raise NotImplementedError(_TreeShim.get_reference_revision) def _delta_to_iter_changes(self): """Convert the inv_delta into an iter_changes repr.""" # iter_changes is: # (file_id, # (old_path, new_path), # content_changed, # (old_versioned, new_versioned), # (old_parent_id, new_parent_id), # (old_name, new_name), # (old_kind, new_kind), # (old_exec, new_exec), # ) basis_inv = self._basis_inv for old_path, new_path, file_id, ie in self._inv_delta: # Perf: Would this be faster if we did 'if file_id in basis_inv'? # Since the *very* common case is that the file already exists, it # probably is better to optimize for that try: old_ie = basis_inv[file_id] except errors.NoSuchId: old_ie = None if ie is None: raise AssertionError('How is both old and new None?') change = (file_id, (old_path, new_path), False, (False, False), (None, None), (None, None), (None, None), (None, None), ) change = (file_id, (old_path, new_path), True, (False, True), (None, ie.parent_id), (None, ie.name), (None, ie.kind), (None, ie.executable), ) else: if ie is None: change = (file_id, (old_path, new_path), True, (True, False), (old_ie.parent_id, None), (old_ie.name, None), (old_ie.kind, None), (old_ie.executable, None), ) else: content_modified = (ie.text_sha1 != old_ie.text_sha1 or ie.text_size != old_ie.text_size) # TODO: ie.kind != old_ie.kind # TODO: symlinks changing targets, content_modified? change = (file_id, (old_path, new_path), content_modified, (True, True), (old_ie.parent_id, ie.parent_id), (old_ie.name, ie.name), (old_ie.kind, ie.kind), (old_ie.executable, ie.executable), ) yield change class AbstractRevisionStore(object): def __init__(self, repo): """An object responsible for loading revisions into a repository. NOTE: Repository locking is not managed by this class. Clients should take a write lock, call load() multiple times, then release the lock. :param repository: the target repository """ self.repo = repo self._graph = None self._use_known_graph = True self._supports_chks = getattr(repo._format, 'supports_chks', False) def expects_rich_root(self): """Does this store expect inventories with rich roots?""" return self.repo.supports_rich_root() def init_inventory(self, revision_id): """Generate an inventory for a parentless revision.""" if self._supports_chks: inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID) else: inv = inventory.Inventory(revision_id=revision_id) if self.expects_rich_root(): # The very first root needs to have the right revision inv.root.revision = revision_id return inv def _init_chk_inventory(self, revision_id, root_id): """Generate a CHKInventory for a parentless revision.""" from bzrlib import chk_map # Get the creation parameters chk_store = self.repo.chk_bytes serializer = self.repo._format._serializer search_key_name = serializer.search_key_name maximum_size = serializer.maximum_size # Maybe the rest of this ought to be part of the CHKInventory API? inv = inventory.CHKInventory(search_key_name) inv.revision_id = revision_id inv.root_id = root_id search_key_func = chk_map.search_key_registry.get(search_key_name) inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func) inv.id_to_entry._root_node.set_maximum_size(maximum_size) inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store, None, search_key_func) inv.parent_id_basename_to_file_id._root_node.set_maximum_size( maximum_size) inv.parent_id_basename_to_file_id._root_node._key_width = 2 return inv def get_inventory(self, revision_id): """Get a stored inventory.""" return self.repo.get_inventory(revision_id) def get_file_text(self, revision_id, file_id): """Get the text stored for a file in a given revision.""" revtree = self.repo.revision_tree(revision_id) return revtree.get_file_text(file_id) def get_file_lines(self, revision_id, file_id): """Get the lines stored for a file in a given revision.""" revtree = self.repo.revision_tree(revision_id) return osutils.split_lines(revtree.get_file_text(file_id)) def start_new_revision(self, revision, parents, parent_invs): """Init the metadata needed for get_parents_and_revision_for_entry(). :param revision: a Revision object """ self._current_rev_id = revision.revision_id self._rev_parents = parents self._rev_parent_invs = parent_invs # We don't know what the branch will be so there's no real BranchConfig. # That means we won't be triggering any hooks and that's a good thing. # Without a config though, we must pass in the committer below so that # the commit builder doesn't try to look up the config. config = None # We can't use self.repo.get_commit_builder() here because it starts a # new write group. We want one write group around a batch of imports # where the default batch size is currently 10000. IGC 20090312 self._commit_builder = self.repo._commit_builder_class(self.repo, parents, config, timestamp=revision.timestamp, timezone=revision.timezone, committer=revision.committer, revprops=revision.properties, revision_id=revision.revision_id) def get_parents_and_revision_for_entry(self, ie): """Get the parents and revision for an inventory entry. :param ie: the inventory entry :return parents, revision_id where parents is the tuple of parent revision_ids for the per-file graph revision_id is the revision_id to use for this entry """ # Check for correct API usage if self._current_rev_id is None: raise AssertionError("start_new_revision() must be called" " before get_parents_and_revision_for_entry()") if ie.revision != self._current_rev_id: raise AssertionError("start_new_revision() registered a different" " revision (%s) to that in the inventory entry (%s)" % (self._current_rev_id, ie.revision)) # Find the heads. This code is lifted from # repository.CommitBuilder.record_entry_contents(). parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs) head_set = self._commit_builder._heads(ie.file_id, parent_candidate_entries.keys()) heads = [] for inv in self._rev_parent_invs: if inv.has_id(ie.file_id): old_rev = inv[ie.file_id].revision if old_rev in head_set: rev_id = inv[ie.file_id].revision heads.append(rev_id) head_set.remove(rev_id) # Find the revision to use. If the content has not changed # since the parent, record the parent's revision. if len(heads) == 0: return (), ie.revision parent_entry = parent_candidate_entries[heads[0]] changed = False if len(heads) > 1: changed = True elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or parent_entry.parent_id != ie.parent_id): changed = True elif ie.kind == 'file': if (parent_entry.text_sha1 != ie.text_sha1 or parent_entry.executable != ie.executable): changed = True elif ie.kind == 'symlink': if parent_entry.symlink_target != ie.symlink_target: changed = True if changed: rev_id = ie.revision else: rev_id = parent_entry.revision return tuple(heads), rev_id def load(self, rev, inv, signature, text_provider, parents_provider, inventories_provider=None): """Load a revision. :param rev: the Revision :param inv: the inventory :param signature: signing information :param text_provider: a callable expecting a file_id parameter that returns the text for that file-id :param parents_provider: a callable expecting a file_id parameter that return the list of parent-ids for that file-id :param inventories_provider: a callable expecting a repository and a list of revision-ids, that returns: * the list of revision-ids present in the repository * the list of inventories for the revision-id's, including an empty inventory for the missing revisions If None, a default implementation is provided. """ # NOTE: This is bzrlib.repository._install_revision refactored to # to provide more flexibility in how previous revisions are cached, # data is feed in, etc. # Get the non-ghost parents and their inventories if inventories_provider is None: inventories_provider = self._default_inventories_provider present_parents, parent_invs = inventories_provider(rev.parent_ids) # Load the inventory try: rev.inventory_sha1 = self._add_inventory(rev.revision_id, inv, present_parents, parent_invs) except errors.RevisionAlreadyPresent: pass # Load the texts, signature and revision entries = self._non_root_entries_iter(inv, rev.revision_id) self._load_texts(rev.revision_id, entries, text_provider, parents_provider) if signature is not None: self.repo.add_signature_text(rev.revision_id, signature) self._add_revision(rev, inv) def load_using_delta(self, rev, basis_inv, inv_delta, signature, text_provider, parents_provider, inventories_provider=None): """Load a revision by applying a delta to a (CHK)Inventory. :param rev: the Revision :param basis_inv: the basis Inventory or CHKInventory :param inv_delta: the inventory delta :param signature: signing information :param text_provider: a callable expecting a file_id parameter that returns the text for that file-id :param parents_provider: a callable expecting a file_id parameter that return the list of parent-ids for that file-id :param inventories_provider: a callable expecting a repository and a list of revision-ids, that returns: * the list of revision-ids present in the repository * the list of inventories for the revision-id's, including an empty inventory for the missing revisions If None, a default implementation is provided. """ # TODO: set revision_id = rev.revision_id builder = self.repo._commit_builder_class(self.repo, parents=rev.parent_ids, config=None, timestamp=rev.timestamp, timezone=rev.timezone, committer=rev.committer, revprops=rev.properties, revision_id=rev.revision_id) if self._graph is None and self._use_known_graph: if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and getattr(self.repo, "get_known_graph_ancestry", None)): self._graph = self.repo.get_known_graph_ancestry( rev.parent_ids) else: self._use_known_graph = False if self._graph is not None: orig_heads = builder._heads def thunked_heads(file_id, revision_ids): # self._graph thinks in terms of keys, not ids, so translate # them # old_res = orig_heads(file_id, revision_ids) if len(revision_ids) < 2: res = set(revision_ids) else: res = set(self._graph.heads(revision_ids)) # if old_res != res: # import pdb; pdb.set_trace() return res builder._heads = thunked_heads if rev.parent_ids: basis_rev_id = rev.parent_ids[0] else: basis_rev_id = _mod_revision.NULL_REVISION tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider) changes = tree._delta_to_iter_changes() for (file_id, path, fs_hash) in builder.record_iter_changes( tree, basis_rev_id, changes): # So far, we don't *do* anything with the result pass builder.finish_inventory() # TODO: This is working around a bug in the bzrlib code base. # 'builder.finish_inventory()' ends up doing: # self.inv_sha1 = self.repository.add_inventory_by_delta(...) # However, add_inventory_by_delta returns (sha1, inv) # And we *want* to keep a handle on both of those objects if isinstance(builder.inv_sha1, tuple): builder.inv_sha1, builder.new_inventory = builder.inv_sha1 # This is a duplicate of Builder.commit() since we already have the # Revision object, and we *don't* want to call commit_write_group() rev.inv_sha1 = builder.inv_sha1 try: config = builder._config_stack except AttributeError: # bzr < 2.5 config = builder._config builder.repository.add_revision(builder._new_revision_id, rev, builder.new_inventory) if self._graph is not None: # TODO: Use StaticTuple and .intern() for these things self._graph.add_node(builder._new_revision_id, rev.parent_ids) if signature is not None: raise AssertionError('signatures not guaranteed yet') self.repo.add_signature_text(rev.revision_id, signature) # self._add_revision(rev, inv) return builder.revision_tree().inventory def _non_root_entries_iter(self, inv, revision_id): if hasattr(inv, 'iter_non_root_entries'): entries = inv.iter_non_root_entries() else: path_entries = inv.iter_entries() # Backwards compatibility hack: skip the root id. if not self.repo.supports_rich_root(): path, root = path_entries.next() if root.revision != revision_id: raise errors.IncompatibleRevision(repr(self.repo)) entries = iter([ie for path, ie in path_entries]) return entries def _load_texts(self, revision_id, entries, text_provider, parents_provider): """Load texts to a repository for inventory entries. This method is provided for subclasses to use or override. :param revision_id: the revision identifier :param entries: iterator over the inventory entries :param text_provider: a callable expecting a file_id parameter that returns the text for that file-id :param parents_provider: a callable expecting a file_id parameter that return the list of parent-ids for that file-id """ raise NotImplementedError(self._load_texts) def _add_inventory(self, revision_id, inv, parents, parent_invs): """Add the inventory inv to the repository as revision_id. :param parents: The revision ids of the parents that revision_id is known to have and are in the repository already. :param parent_invs: the parent inventories :returns: The validator(which is a sha1 digest, though what is sha'd is repository format specific) of the serialized inventory. """ return self.repo.add_inventory(revision_id, inv, parents) def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta, parents, parent_invs): """Add the inventory to the repository as revision_id. :param basis_inv: the basis Inventory or CHKInventory :param inv_delta: the inventory delta :param parents: The revision ids of the parents that revision_id is known to have and are in the repository already. :param parent_invs: the parent inventories :returns: (validator, inv) where validator is the validator (which is a sha1 digest, though what is sha'd is repository format specific) of the serialized inventory; inv is the generated inventory """ if len(parents): if self._supports_chks: try: validator, new_inv = self.repo.add_inventory_by_delta(parents[0], inv_delta, revision_id, parents, basis_inv=basis_inv, propagate_caches=False) except errors.InconsistentDelta: #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()]) trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta])) raise else: validator, new_inv = self.repo.add_inventory_by_delta(parents[0], inv_delta, revision_id, parents) else: if isinstance(basis_inv, inventory.CHKInventory): new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id) else: new_inv = inventory.Inventory(revision_id=revision_id) # This is set in the delta so remove it to prevent a duplicate del new_inv[inventory.ROOT_ID] new_inv.apply_delta(inv_delta) validator = self.repo.add_inventory(revision_id, new_inv, parents) return validator, new_inv def _add_revision(self, rev, inv): """Add a revision and its inventory to a repository. :param rev: the Revision :param inv: the inventory """ self.repo.add_revision(rev.revision_id, rev, inv) def _default_inventories_provider(self, revision_ids): """An inventories provider that queries the repository.""" present = [] inventories = [] for revision_id in revision_ids: if self.repo.has_revision(revision_id): present.append(revision_id) rev_tree = self.repo.revision_tree(revision_id) else: rev_tree = self.repo.revision_tree(None) inventories.append(rev_tree.inventory) return present, inventories class RevisionStore1(AbstractRevisionStore): """A RevisionStore that uses the old bzrlib Repository API. The old API was present until bzr.dev rev 3510. """ def _load_texts(self, revision_id, entries, text_provider, parents_provider): """See RevisionStore._load_texts().""" # Add the texts that are not already present tx = self.repo.get_transaction() for ie in entries: # This test is *really* slow: over 50% of import time #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx) #if ie.revision in w: # continue # Try another way, realising that this assumes that the # version is not already there. In the general case, # a shared repository might already have the revision but # we arguably don't need that check when importing from # a foreign system. if ie.revision != revision_id: continue file_id = ie.file_id text_parents = [(file_id, p) for p in parents_provider(file_id)] lines = text_provider(file_id) vfile = self.repo.weave_store.get_weave_or_empty(file_id, tx) vfile.add_lines(revision_id, text_parents, lines) def get_file_lines(self, revision_id, file_id): tx = self.repo.get_transaction() w = self.repo.weave_store.get_weave(file_id, tx) return w.get_lines(revision_id) def _add_revision(self, rev, inv): # There's no need to do everything repo.add_revision does and # doing so (since bzr.dev 3392) can be pretty slow for long # delta chains on inventories. Just do the essentials here ... _mod_revision.check_not_reserved_id(rev.revision_id) self.repo._revision_store.add_revision(rev, self.repo.get_transaction()) class RevisionStore2(AbstractRevisionStore): """A RevisionStore that uses the new bzrlib Repository API.""" def _load_texts(self, revision_id, entries, text_provider, parents_provider): """See RevisionStore._load_texts().""" text_keys = {} for ie in entries: text_keys[(ie.file_id, ie.revision)] = ie text_parent_map = self.repo.texts.get_parent_map(text_keys) missing_texts = set(text_keys) - set(text_parent_map) self._load_texts_for_file_rev_ids(missing_texts, text_provider, parents_provider) def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider, parents_provider): """Load texts to a repository for file-ids, revision-id tuples. :param file_rev_ids: iterator over the (file_id, revision_id) tuples :param text_provider: a callable expecting a file_id parameter that returns the text for that file-id :param parents_provider: a callable expecting a file_id parameter that return the list of parent-ids for that file-id """ for file_id, revision_id in file_rev_ids: text_key = (file_id, revision_id) text_parents = [(file_id, p) for p in parents_provider(file_id)] lines = text_provider(file_id) #print "adding text for %s\n\tparents:%s" % (text_key,text_parents) self.repo.texts.add_lines(text_key, text_parents, lines) def get_file_lines(self, revision_id, file_id): record = self.repo.texts.get_record_stream([(file_id, revision_id)], 'unordered', True).next() if record.storage_kind == 'absent': raise errors.RevisionNotPresent(record.key, self.repo) return osutils.split_lines(record.get_bytes_as('fulltext')) # This is breaking imports into brisbane-core currently #def _add_revision(self, rev, inv): # # There's no need to do everything repo.add_revision does and # # doing so (since bzr.dev 3392) can be pretty slow for long # # delta chains on inventories. Just do the essentials here ... # _mod_revision.check_not_reserved_id(rev.revision_id) # self.repo._add_revision(rev) class ImportRevisionStore1(RevisionStore1): """A RevisionStore (old Repository API) optimised for importing. This implementation caches serialised inventory texts and provides fine-grained control over when inventories are stored as fulltexts. """ def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None, random_ids=True): """See AbstractRevisionStore.__init__. :param repository: the target repository :param parent_text_to_cache: the number of parent texts to cache :para fulltext_when: if non None, a function to call to decide whether to fulltext the inventory or not. The revision count is passed as a parameter and the result is treated as a boolean. """ RevisionStore1.__init__(self, repo) self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache) self.fulltext_when = fulltext_when self.random_ids = random_ids self.revision_count = 0 def _add_inventory(self, revision_id, inv, parents, parent_invs): """See RevisionStore._add_inventory.""" # Code taken from bzrlib.repository.add_inventory assert self.repo.is_in_write_group() _mod_revision.check_not_reserved_id(revision_id) assert inv.revision_id is None or inv.revision_id == revision_id, \ "Mismatch between inventory revision" \ " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id) assert inv.root is not None inv_lines = self.repo._serialise_inventory_to_lines(inv) inv_vf = self.repo.get_inventory_weave() sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf, revision_id, parents, inv_lines, self.inv_parent_texts) self.inv_parent_texts[revision_id] = parent_text return sha1 def _inventory_add_lines(self, inv_vf, version_id, parents, lines, parent_texts): """See Repository._inventory_add_lines().""" # setup parameters used in original code but not this API self.revision_count += 1 if self.fulltext_when is not None: delta = not self.fulltext_when(self.revision_count) else: delta = inv_vf.delta left_matching_blocks = None random_id = self.random_ids check_content = False # bzrlib.knit.add_lines() but error checking optimised inv_vf._check_add(version_id, lines, random_id, check_content) #################################################################### # bzrlib.knit._add() but skip checking if fulltext better than delta #################################################################### line_bytes = ''.join(lines) digest = osutils.sha_string(line_bytes) present_parents = [] for parent in parents: if inv_vf.has_version(parent): present_parents.append(parent) if parent_texts is None: parent_texts = {} # can only compress against the left most present parent. if (delta and (len(present_parents) == 0 or present_parents[0] != parents[0])): delta = False text_length = len(line_bytes) options = [] if lines: if lines[-1][-1] != '\n': # copy the contents of lines. lines = lines[:] options.append('no-eol') lines[-1] = lines[-1] + '\n' line_bytes += '\n' #if delta: # # To speed the extract of texts the delta chain is limited # # to a fixed number of deltas. This should minimize both # # I/O and the time spend applying deltas. # delta = inv_vf._check_should_delta(present_parents) assert isinstance(version_id, str) content = inv_vf.factory.make(lines, version_id) if delta or (inv_vf.factory.annotated and len(present_parents) > 0): # Merge annotations from parent texts if needed. delta_hunks = inv_vf._merge_annotations(content, present_parents, parent_texts, delta, inv_vf.factory.annotated, left_matching_blocks) if delta: options.append('line-delta') store_lines = inv_vf.factory.lower_line_delta(delta_hunks) size, bytes = inv_vf._data._record_to_data(version_id, digest, store_lines) else: options.append('fulltext') # isinstance is slower and we have no hierarchy. if inv_vf.factory.__class__ == knit.KnitPlainFactory: # Use the already joined bytes saving iteration time in # _record_to_data. size, bytes = inv_vf._data._record_to_data(version_id, digest, lines, [line_bytes]) else: # get mixed annotation + content and feed it into the # serialiser. store_lines = inv_vf.factory.lower_fulltext(content) size, bytes = inv_vf._data._record_to_data(version_id, digest, store_lines) access_memo = inv_vf._data.add_raw_records([size], bytes)[0] inv_vf._index.add_versions( ((version_id, options, access_memo, parents),), random_id=random_id) return digest, text_length, content bzr-fastimport-0.13.0+bzr361/setup.py0000755000000000000000000000154211723175061015451 0ustar 00000000000000#!/usr/bin/env python from distutils.core import setup from info import * if __name__ == '__main__': version = ".".join([str(x) for x in bzr_plugin_version]) setup(name="bzr-fastimport", version=version, description="stream-based import into and export from Bazaar.", author="Canonical Ltd", author_email="bazaar@lists.canonical.com", license = "GNU GPL v2", download_url="http://launchpad.net/bzr-fastimport/trunk/%s/+download/bzr-fastimport-%s.tar.gz" % (version, version), url="https://launchpad.net/bzr-fastimport", scripts=[], packages=['bzrlib.plugins.fastimport', 'bzrlib.plugins.fastimport.processors', 'bzrlib.plugins.fastimport.tests', ], package_dir={'bzrlib.plugins.fastimport': '.'}) bzr-fastimport-0.13.0+bzr361/tests/0000755000000000000000000000000010754757632015111 5ustar 00000000000000bzr-fastimport-0.13.0+bzr361/user_mapper.py0000644000000000000000000000517711643171110016630 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from email import Utils class UserMapper(object): def __init__(self, lines): """Create a user-mapper from a list of lines. Blank lines and comment lines (starting with #) are ignored. Otherwise lines are of the form: old-id = new-id Each id may be in the following forms: name name If old-id has the value '@', then new-id is the domain to use when generating an email from a user-id. """ self._parse(lines) def _parse(self, lines): self._user_map = {} self._default_domain = None for line in lines: line = line.strip() if len(line) == 0 or line.startswith('#'): continue old, new = line.split('=', 1) old = old.strip() new = new.strip() if old == '@': self._default_domain = new continue # Parse each id into a name and email address old_name, old_email = self._parse_id(old) new_name, new_email = self._parse_id(new) #print "found user map: %s => %s" % ((old_name, old_email), (new_name, new_email)) self._user_map[(old_name, old_email)] = (new_name, new_email) def _parse_id(self, id): if id.find('<') == -1: return id, "" else: return Utils.parseaddr(id) def map_name_and_email(self, name, email): """Map a name and an email to the preferred name and email. :param name: the current name :param email: the current email :result: the preferred name and email """ try: new_name, new_email = self._user_map[(name, email)] except KeyError: new_name = name if self._default_domain and not email: new_email = "%s@%s" % (name, self._default_domain) else: new_email = email return new_name, new_email bzr-fastimport-0.13.0+bzr361/doc/notes.txt0000644000000000000000000000327110767644220016400 0ustar 00000000000000======================= Notes on bzr-fastimport ======================= ..contents:: Features ======== fast-import ----------- Things that ought to work: * add & deletes of files and symlinks * automatic creation of directories (but not deletion) * executable permission * branches created based on where the import is run: * import into a shared repository outside a branch - branches are created as subdirectories of the current directory * import into a branch inside a shared repository - current branch becomes the trunk and other branches are created as sister directories * import into a standalone tree - warnings are given for branches (heads) found but not imported * merge tracking Things that probably work (more testing needed): * separate author to committer * lightweight tags Known Limitations ================= Parsing ------- Things not supported yet: * renaming a path that contains a space in the old name * copying a path that contains a space in the source name * delimited data sections (all data must be length prefixed currently) * rfc2822 dates. fast-import ----------- Things not supported yet: * deterministic revision-ids as an option * 'reset' handling * 'filedeleteall' handling Things not recorded in Bazaar: * tagger and message for (non-lightweight) tags * copy semantics Custom Enhancements =================== General ------- The date format is auto-detected. Parsing ------- These enhancements over the specification are provided in order to read data produced by some verisons of git-fast-export: * A person's name may be empty * Long file modes with an extra leading 0, i.e. 0000644, 0000755 and 0120000 are legal. bzr-fastimport-0.13.0+bzr361/explorer/logos/0000755000000000000000000000000011331223130016677 5ustar 00000000000000bzr-fastimport-0.13.0+bzr361/explorer/tools.xml0000644000000000000000000000234511342230234017447 0ustar 00000000000000 bzr-fastimport-0.13.0+bzr361/explorer/logos/cvs.png0000644000000000000000000000163711331223130020207 0ustar 00000000000000PNG  IHDRasRGB pHYs  tIME1jip>-i@5(!t͍)IhzBC`F޴IENDB`bzr-fastimport-0.13.0+bzr361/explorer/logos/darcs.png0000644000000000000000000000161311331223130020502 0ustar 00000000000000PNG  IHDRV%sRGBbKGD pHYs  tIME;9&E IDAT(]j[gEG[)b'MSdLRfbC/t  I1t@!cJ)-iJ"6:ّs%tړ7-//c}}PդIQU5;;ϊ|;77w*#---]{󋩩gHE@0x횦i@ 333k~IaC"1X$W؅y"xǾPtj ^Q ɷBpك{yLLRBL;bo!J7ߐl- pmHCQeJۋ_fT\]\䩄%(=x!9d09BB(7777ˢie˱: LczCELJp7mvwwK CmOo 3$!&"`zA`L0{mbT؆$I ^䈌fo.B">x[&_of޽^KteO W9>ϒNpy.~fpu_: m?pJ9&o8|駎>riCwhCkSBgNR^2FjcF— $&2&0[qDڟk'0X\UU(w(*<`q"S<^Cmp84  S|YcLsG!IhtYG5D/AuppIENDB`bzr-fastimport-0.13.0+bzr361/explorer/logos/git.png0000644000000000000000000000016311331223130020170 0ustar 00000000000000PNG  IHDRb PLTE|%IDATcX ؈&[2+ajjdsQ#[ovIENDB`bzr-fastimport-0.13.0+bzr361/explorer/logos/mercurial.png0000644000000000000000000000143011331223130021366 0ustar 00000000000000PNG  IHDR(-SPLTEɺ崳ɘ͸Ưƽ```SRSooooootttkkkLKLKKKwww;;;hhhgggyyyIII[[[888;;;???RRRXXX[[[iiiqqqrrrxxx{{{>ܲD[tRNS !')***.3;;@@LOSVWWZbmt>bKGDDIDAT5Zѻ;;n8|s.K =YZA5fq{K[XZ<}h4RzE[c"D|&h(ZD0CD 1J0!;ai1rthfOT&#cqqBMq{|[m(.,s|"ATSDaܾ;ǸmS{LOBt?sTֶ(VjܸsxxGgX6Ed8 >n KH%ō042|p 0X7#"IENDB`bzr-fastimport-0.13.0+bzr361/explorer/logos/perforce.png0000644000000000000000000000163411331223130021216 0ustar 00000000000000PNG  IHDRasRGB pHYsuucQ2tIME \tEXtCommentCreated with GIMPW IDAT8E[O\ezfJSFbJSh,IkFOxgLH`m%M[Rҙ" 0a^u.yV9}lqB@ !WEF=k!>VKd0Z*%T=]Aq,qCya3X'Kr||At{yS. Gxz8`cq}&}Z2 6ny`k 2ŹnAYï|U$IafsI~X^f7r4[@ljƢ(pb2vv 5iALnd`}d3YD)\L/R7GT8T6rG='/(66x?"<ÀJ6-g>4&?BwD"[DKĽ;Ď֩y.\ansڡ{d(~c$F~5޽kP,.0>1ɣ: H$+opf 5/2߇LZc0DMg/?;~rx/:.FhZD `Ekbyϖ'#L (5k4r[8("oPh͑I`oJmrIENDB`bzr-fastimport-0.13.0+bzr361/explorer/logos/subversion.png0000644000000000000000000000203611331223130021605 0ustar 00000000000000PNG  IHDR \9ͳtIME C* pHYs  ~gAMA aPLTE{|||}~~؀ففقككڄڄچڇۉۊۊ܎ݑݑޒޔޖߗߘ᜼ួ⟾⠾⠿⡿𠠤SIDATxcniϸ"GL+0AK(-kS cPf(kll0 MehچF 4T&Fۆ, T兹AFJ76d1eAݡP^7)Ygj QPm& *D&IENDB`bzr-fastimport-0.13.0+bzr361/processors/__init__.py0000644000000000000000000000130011643171110020227 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Actual import processors.""" bzr-fastimport-0.13.0+bzr361/processors/generic_processor.py0000644000000000000000000005737712335104053022235 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Import processor that supports all Bazaar repository formats.""" import time from bzrlib import ( debug, delta, errors, osutils, progress, ) try: from bzrlib.repofmt.knitpack_repo import KnitPackRepository except ImportError: from bzrlib.repofmt.pack_repo import KnitPackRepository from bzrlib.trace import ( mutter, note, warning, ) try: import bzrlib.util.configobj.configobj as configobj except ImportError: import configobj from bzrlib.plugins.fastimport import ( branch_updater, cache_manager, helpers, idmapfile, marks_file, revision_store, ) from fastimport import ( commands, errors as plugin_errors, processor, ) # How many commits before automatically reporting progress _DEFAULT_AUTO_PROGRESS = 1000 # How many commits before automatically checkpointing _DEFAULT_AUTO_CHECKPOINT = 10000 # How many checkpoints before automatically packing _DEFAULT_AUTO_PACK = 4 # How many inventories to cache _DEFAULT_INV_CACHE_SIZE = 1 _DEFAULT_CHK_INV_CACHE_SIZE = 1 class GenericProcessor(processor.ImportProcessor): """An import processor that handles basic imports. Current features supported: * blobs are cached in memory * files and symlinks commits are supported * checkpoints automatically happen at a configurable frequency over and above the stream requested checkpoints * timestamped progress reporting, both automatic and stream requested * some basic statistics are dumped on completion. At checkpoints and on completion, the commit-id -> revision-id map is saved to a file called 'fastimport-id-map'. If the import crashes or is interrupted, it can be started again and this file will be used to skip over already loaded revisions. The format of each line is "commit-id revision-id" so commit-ids cannot include spaces. Here are the supported parameters: * info - name of a hints file holding the analysis generated by running the fast-import-info processor in verbose mode. When importing large repositories, this parameter is needed so that the importer knows what blobs to intelligently cache. * trees - update the working trees before completing. By default, the importer updates the repository and branches and the user needs to run 'bzr update' for the branches of interest afterwards. * count - only import this many commits then exit. If not set or negative, all commits are imported. * checkpoint - automatically checkpoint every n commits over and above any checkpoints contained in the import stream. The default is 10000. * autopack - pack every n checkpoints. The default is 4. * inv-cache - number of inventories to cache. If not set, the default is 1. * mode - import algorithm to use: default, experimental or classic. * import-marks - name of file to read to load mark information from * export-marks - name of file to write to save mark information to """ known_params = [ 'info', 'trees', 'count', 'checkpoint', 'autopack', 'inv-cache', 'mode', 'import-marks', 'export-marks', ] def __init__(self, bzrdir, params=None, verbose=False, outf=None, prune_empty_dirs=True): processor.ImportProcessor.__init__(self, params, verbose) self.prune_empty_dirs = prune_empty_dirs self.bzrdir = bzrdir try: # Might be inside a branch (self.working_tree, self.branch) = bzrdir._get_tree_branch() self.repo = self.branch.repository except errors.NotBranchError: # Must be inside a repository self.working_tree = None self.branch = None self.repo = bzrdir.open_repository() def pre_process(self): self._start_time = time.time() self._load_info_and_params() if self.total_commits: self.note("Starting import of %d commits ..." % (self.total_commits,)) else: self.note("Starting import ...") self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose, self.inventory_cache_size) if self.params.get("import-marks") is not None: mark_info = marks_file.import_marks(self.params.get("import-marks")) if mark_info is not None: self.cache_mgr.marks = mark_info self.skip_total = False self.first_incremental_commit = True else: self.first_incremental_commit = False self.skip_total = self._init_id_map() if self.skip_total: self.note("Found %d commits already loaded - " "skipping over these ...", self.skip_total) self._revision_count = 0 # mapping of tag name to revision_id self.tags = {} # Create the revision store to use for committing, if any self.rev_store = self._revision_store_factory() # Disable autopacking if the repo format supports it. # THIS IS A HACK - there is no sanctioned way of doing this yet. if isinstance(self.repo, KnitPackRepository): self._original_max_pack_count = \ self.repo._pack_collection._max_pack_count def _max_pack_count_for_import(total_revisions): return total_revisions + 1 self.repo._pack_collection._max_pack_count = \ _max_pack_count_for_import else: self._original_max_pack_count = None # Make groupcompress use the fast algorithm during importing. # We want to repack at the end anyhow when more information # is available to do a better job of saving space. try: from bzrlib import groupcompress groupcompress._FAST = True except ImportError: pass # Create a write group. This is committed at the end of the import. # Checkpointing closes the current one and starts a new one. self.repo.start_write_group() def _load_info_and_params(self): from bzrlib.plugins.fastimport import bzr_commit_handler self._mode = bool(self.params.get('mode', 'default')) self._experimental = self._mode == 'experimental' # This is currently hard-coded but might be configurable via # parameters one day if that's needed repo_transport = self.repo.control_files._transport self.id_map_path = repo_transport.local_abspath("fastimport-id-map") # Load the info file, if any info_path = self.params.get('info') if info_path is not None: self.info = configobj.ConfigObj(info_path) else: self.info = None # Decide which CommitHandler to use self.supports_chk = getattr(self.repo._format, 'supports_chks', False) if self.supports_chk and self._mode == 'classic': note("Cannot use classic algorithm on CHK repositories" " - using default one instead") self._mode = 'default' if self._mode == 'classic': self.commit_handler_factory = \ bzr_commit_handler.InventoryCommitHandler else: self.commit_handler_factory = \ bzr_commit_handler.InventoryDeltaCommitHandler # Decide how often to automatically report progress # (not a parameter yet) self.progress_every = _DEFAULT_AUTO_PROGRESS if self.verbose: self.progress_every = self.progress_every / 10 # Decide how often (# of commits) to automatically checkpoint self.checkpoint_every = int(self.params.get('checkpoint', _DEFAULT_AUTO_CHECKPOINT)) # Decide how often (# of checkpoints) to automatically pack self.checkpoint_count = 0 self.autopack_every = int(self.params.get('autopack', _DEFAULT_AUTO_PACK)) # Decide how big to make the inventory cache cache_size = int(self.params.get('inv-cache', -1)) if cache_size == -1: if self.supports_chk: cache_size = _DEFAULT_CHK_INV_CACHE_SIZE else: cache_size = _DEFAULT_INV_CACHE_SIZE self.inventory_cache_size = cache_size # Find the maximum number of commits to import (None means all) # and prepare progress reporting. Just in case the info file # has an outdated count of commits, we store the max counts # at which we need to terminate separately to the total used # for progress tracking. try: self.max_commits = int(self.params['count']) if self.max_commits < 0: self.max_commits = None except KeyError: self.max_commits = None if self.info is not None: self.total_commits = int(self.info['Command counts']['commit']) if (self.max_commits is not None and self.total_commits > self.max_commits): self.total_commits = self.max_commits else: self.total_commits = self.max_commits def _revision_store_factory(self): """Make a RevisionStore based on what the repository supports.""" new_repo_api = hasattr(self.repo, 'revisions') if new_repo_api: return revision_store.RevisionStore2(self.repo) elif not self._experimental: return revision_store.RevisionStore1(self.repo) else: def fulltext_when(count): total = self.total_commits if total is not None and count == total: fulltext = True else: # Create an inventory fulltext every 200 revisions fulltext = count % 200 == 0 if fulltext: self.note("%d commits - storing inventory as full-text", count) return fulltext return revision_store.ImportRevisionStore1( self.repo, self.inventory_cache_size, fulltext_when=fulltext_when) def process(self, command_iter): """Import data into Bazaar by processing a stream of commands. :param command_iter: an iterator providing commands """ if self.working_tree is not None: self.working_tree.lock_write() elif self.branch is not None: self.branch.lock_write() elif self.repo is not None: self.repo.lock_write() try: super(GenericProcessor, self)._process(command_iter) finally: # If an unhandled exception occurred, abort the write group if self.repo is not None and self.repo.is_in_write_group(): self.repo.abort_write_group() # Release the locks if self.working_tree is not None: self.working_tree.unlock() elif self.branch is not None: self.branch.unlock() elif self.repo is not None: self.repo.unlock() def _process(self, command_iter): # if anything goes wrong, abort the write group if any try: processor.ImportProcessor._process(self, command_iter) except: if self.repo is not None and self.repo.is_in_write_group(): self.repo.abort_write_group() raise def post_process(self): # Commit the current write group and checkpoint the id map self.repo.commit_write_group() self._save_id_map() if self.params.get("export-marks") is not None: marks_file.export_marks(self.params.get("export-marks"), self.cache_mgr.marks) if self.cache_mgr.reftracker.last_ref == None: """Nothing to refresh""" return # Update the branches self.note("Updating branch information ...") updater = branch_updater.BranchUpdater(self.repo, self.branch, self.cache_mgr, helpers.invert_dictset( self.cache_mgr.reftracker.heads), self.cache_mgr.reftracker.last_ref, self.tags) branches_updated, branches_lost = updater.update() self._branch_count = len(branches_updated) # Tell the user about branches that were not created if branches_lost: if not self.repo.is_shared(): self.warning("Cannot import multiple branches into " "a standalone branch") self.warning("Not creating branches for these head revisions:") for lost_info in branches_lost: head_revision = lost_info[1] branch_name = lost_info[0] self.note("\t %s = %s", head_revision, branch_name) # Update the working trees as requested self._tree_count = 0 remind_about_update = True if self._branch_count == 0: self.note("no branches to update") self.note("no working trees to update") remind_about_update = False elif self.params.get('trees', False): trees = self._get_working_trees(branches_updated) if trees: self._update_working_trees(trees) remind_about_update = False else: self.warning("No working trees available to update") else: # Update just the trunk. (This is always the first branch # returned by the branch updater.) trunk_branch = branches_updated[0] trees = self._get_working_trees([trunk_branch]) if trees: self._update_working_trees(trees) remind_about_update = self._branch_count > 1 # Dump the cache stats now because we clear it before the final pack if self.verbose: self.cache_mgr.dump_stats() if self._original_max_pack_count: # We earlier disabled autopacking, creating one pack every # checkpoint instead. We now pack the repository to optimise # how data is stored. self.cache_mgr.clear_all() self._pack_repository() # Finish up by dumping stats & telling the user what to do next. self.dump_stats() if remind_about_update: # This message is explicitly not timestamped. note("To refresh the working tree for other branches, " "use 'bzr update' inside that branch.") def _update_working_trees(self, trees): if self.verbose: reporter = delta._ChangeReporter() else: reporter = None for wt in trees: self.note("Updating the working tree for %s ...", wt.basedir) wt.update(reporter) self._tree_count += 1 def _pack_repository(self, final=True): # Before packing, free whatever memory we can and ensure # that groupcompress is configured to optimise disk space import gc if final: try: from bzrlib import groupcompress except ImportError: pass else: groupcompress._FAST = False gc.collect() self.note("Packing repository ...") self.repo.pack() # To be conservative, packing puts the old packs and # indices in obsolete_packs. We err on the side of # optimism and clear out that directory to save space. self.note("Removing obsolete packs ...") # TODO: Use a public API for this once one exists repo_transport = self.repo._pack_collection.transport repo_transport.clone('obsolete_packs').delete_multi( repo_transport.list_dir('obsolete_packs')) # If we're not done, free whatever memory we can if not final: gc.collect() def _get_working_trees(self, branches): """Get the working trees for branches in the repository.""" result = [] wt_expected = self.repo.make_working_trees() for br in branches: if br is None: continue elif br == self.branch: if self.working_tree: result.append(self.working_tree) elif wt_expected: try: result.append(br.bzrdir.open_workingtree()) except errors.NoWorkingTree: self.warning("No working tree for branch %s", br) return result def dump_stats(self): time_required = progress.str_tdelta(time.time() - self._start_time) rc = self._revision_count - self.skip_total bc = self._branch_count wtc = self._tree_count self.note("Imported %d %s, updating %d %s and %d %s in %s", rc, helpers.single_plural(rc, "revision", "revisions"), bc, helpers.single_plural(bc, "branch", "branches"), wtc, helpers.single_plural(wtc, "tree", "trees"), time_required) def _init_id_map(self): """Load the id-map and check it matches the repository. :return: the number of entries in the map """ # Currently, we just check the size. In the future, we might # decide to be more paranoid and check that the revision-ids # are identical as well. self.cache_mgr.marks, known = idmapfile.load_id_map( self.id_map_path) existing_count = len(self.repo.all_revision_ids()) if existing_count < known: raise plugin_errors.BadRepositorySize(known, existing_count) return known def _save_id_map(self): """Save the id-map.""" # Save the whole lot every time. If this proves a problem, we can # change to 'append just the new ones' at a later time. idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks) def blob_handler(self, cmd): """Process a BlobCommand.""" if cmd.mark is not None: dataref = cmd.id else: dataref = osutils.sha_strings(cmd.data) self.cache_mgr.store_blob(dataref, cmd.data) def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" # Commit the current write group and start a new one self.repo.commit_write_group() self._save_id_map() # track the number of automatic checkpoints done if cmd is None: self.checkpoint_count += 1 if self.checkpoint_count % self.autopack_every == 0: self._pack_repository(final=False) self.repo.start_write_group() def commit_handler(self, cmd): """Process a CommitCommand.""" mark = cmd.id.lstrip(':') if self.skip_total and self._revision_count < self.skip_total: self.cache_mgr.reftracker.track_heads(cmd) # Check that we really do know about this commit-id if not self.cache_mgr.marks.has_key(mark): raise plugin_errors.BadRestart(mark) self.cache_mgr._blobs = {} self._revision_count += 1 if cmd.ref.startswith('refs/tags/'): tag_name = cmd.ref[len('refs/tags/'):] self._set_tag(tag_name, cmd.id) return if self.first_incremental_commit: self.first_incremental_commit = None parents = self.cache_mgr.reftracker.track_heads(cmd) # 'Commit' the revision and report progress handler = self.commit_handler_factory(cmd, self.cache_mgr, self.rev_store, verbose=self.verbose, prune_empty_dirs=self.prune_empty_dirs) try: handler.process() except: print "ABORT: exception occurred processing commit %s" % (cmd.id) raise self.cache_mgr.add_mark(mark, handler.revision_id) self._revision_count += 1 self.report_progress("(%s)" % cmd.id.lstrip(':')) if cmd.ref.startswith('refs/tags/'): tag_name = cmd.ref[len('refs/tags/'):] self._set_tag(tag_name, cmd.id) # Check if we should finish up or automatically checkpoint if (self.max_commits is not None and self._revision_count >= self.max_commits): self.note("Stopping after reaching requested count of commits") self.finished = True elif self._revision_count % self.checkpoint_every == 0: self.note("%d commits - automatic checkpoint triggered", self._revision_count) self.checkpoint_handler(None) def report_progress(self, details=''): if self._revision_count % self.progress_every == 0: if self.total_commits is not None: counts = "%d/%d" % (self._revision_count, self.total_commits) else: counts = "%d" % (self._revision_count,) minutes = (time.time() - self._start_time) / 60 revisions_added = self._revision_count - self.skip_total rate = revisions_added * 1.0 / minutes if rate > 10: rate_str = "at %.0f/minute " % rate else: rate_str = "at %.1f/minute " % rate self.note("%s commits processed %s%s" % (counts, rate_str, details)) def progress_handler(self, cmd): """Process a ProgressCommand.""" # Most progress messages embedded in streams are annoying. # Ignore them unless in verbose mode. if self.verbose: self.note("progress %s" % (cmd.message,)) def reset_handler(self, cmd): """Process a ResetCommand.""" if cmd.ref.startswith('refs/tags/'): tag_name = cmd.ref[len('refs/tags/'):] if cmd.from_ is not None: self._set_tag(tag_name, cmd.from_) elif self.verbose: self.warning("ignoring reset refs/tags/%s - no from clause" % tag_name) return if cmd.from_ is not None: self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_) def tag_handler(self, cmd): """Process a TagCommand.""" if cmd.from_ is not None: self._set_tag(cmd.id, cmd.from_) else: self.warning("ignoring tag %s - no from clause" % cmd.id) def _set_tag(self, name, from_): """Define a tag given a name and import 'from' reference.""" bzr_tag_name = name.decode('utf-8', 'replace') bzr_rev_id = self.cache_mgr.lookup_committish(from_) self.tags[bzr_tag_name] = bzr_rev_id def feature_handler(self, cmd): """Process a FeatureCommand.""" feature = cmd.feature_name if feature not in commands.FEATURE_NAMES: raise plugin_errors.UnknownFeature(feature) def debug(self, msg, *args): """Output a debug message if the appropriate -D option was given.""" if "fast-import" in debug.debug_flags: msg = "%s DEBUG: %s" % (self._time_of_day(), msg) mutter(msg, *args) def note(self, msg, *args): """Output a note but timestamp it.""" msg = "%s %s" % (self._time_of_day(), msg) note(msg, *args) def warning(self, msg, *args): """Output a warning but timestamp it.""" msg = "%s WARNING: %s" % (self._time_of_day(), msg) warning(msg, *args) bzr-fastimport-0.13.0+bzr361/processors/info_processor.py0000644000000000000000000002533012335104053021534 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Import processor that dump stats about the input (and doesn't import).""" from bzrlib.plugins.fastimport import ( reftracker, ) from bzrlib.plugins.fastimport.helpers import ( invert_dict, invert_dictset, ) from fastimport import ( commands, processor, ) import stat class InfoProcessor(processor.ImportProcessor): """An import processor that dumps statistics about the input. No changes to the current repository are made. As well as providing useful information about an import stream before importing it, this processor is useful for benchmarking the speed at which data can be extracted from the source. """ def __init__(self, params=None, verbose=0, outf=None): processor.ImportProcessor.__init__(self, params, verbose, outf=outf) def pre_process(self): # Init statistics self.cmd_counts = {} for cmd in commands.COMMAND_NAMES: self.cmd_counts[cmd] = 0 self.file_cmd_counts = {} for fc in commands.FILE_COMMAND_NAMES: self.file_cmd_counts[fc] = 0 self.parent_counts = {} self.max_parent_count = 0 self.committers = set() self.separate_authors_found = False self.symlinks_found = False self.executables_found = False self.sha_blob_references = False self.lightweight_tags = 0 # Blob usage tracking self.blobs = {} for usage in ['new', 'used', 'unknown', 'unmarked']: self.blobs[usage] = set() self.blob_ref_counts = {} # Head tracking self.reftracker = reftracker.RefTracker() # Stuff to cache: a map from mark to # of times that mark is merged self.merges = {} # Stuff to cache: these are maps from mark to sets self.rename_old_paths = {} self.copy_source_paths = {} def post_process(self): # Dump statistics cmd_names = commands.COMMAND_NAMES fc_names = commands.FILE_COMMAND_NAMES self._dump_stats_group("Command counts", [(c, self.cmd_counts[c]) for c in cmd_names], str) self._dump_stats_group("File command counts", [(c, self.file_cmd_counts[c]) for c in fc_names], str) # Commit stats if self.cmd_counts['commit']: p_items = [] for i in xrange(0, self.max_parent_count + 1): if i in self.parent_counts: count = self.parent_counts[i] p_items.append(("parents-%d" % i, count)) merges_count = len(self.merges.keys()) p_items.append(('total revisions merged', merges_count)) flags = { 'separate authors found': self.separate_authors_found, 'executables': self.executables_found, 'symlinks': self.symlinks_found, 'blobs referenced by SHA': self.sha_blob_references, } self._dump_stats_group("Parent counts", p_items, str) self._dump_stats_group("Commit analysis", flags.iteritems(), _found) heads = invert_dictset(self.reftracker.heads) self._dump_stats_group("Head analysis", heads.iteritems(), None, _iterable_as_config_list) # note("\t%d\t%s" % (len(self.committers), 'unique committers')) self._dump_stats_group("Merges", self.merges.iteritems(), None) # We only show the rename old path and copy source paths when -vv # (verbose=2) is specified. The output here for mysql's data can't # be parsed currently so this bit of code needs more work anyhow .. if self.verbose >= 2: self._dump_stats_group("Rename old paths", self.rename_old_paths.iteritems(), len, _iterable_as_config_list) self._dump_stats_group("Copy source paths", self.copy_source_paths.iteritems(), len, _iterable_as_config_list) # Blob stats if self.cmd_counts['blob']: # In verbose mode, don't list every blob used if self.verbose: del self.blobs['used'] self._dump_stats_group("Blob usage tracking", self.blobs.iteritems(), len, _iterable_as_config_list) if self.blob_ref_counts: blobs_by_count = invert_dict(self.blob_ref_counts) blob_items = blobs_by_count.items() blob_items.sort() self._dump_stats_group("Blob reference counts", blob_items, len, _iterable_as_config_list) # Other stats if self.cmd_counts['reset']: reset_stats = { 'lightweight tags': self.lightweight_tags, } self._dump_stats_group("Reset analysis", reset_stats.iteritems()) def _dump_stats_group(self, title, items, normal_formatter=None, verbose_formatter=None): """Dump a statistics group. In verbose mode, do so as a config file so that other processors can load the information if they want to. :param normal_formatter: the callable to apply to the value before displaying it in normal mode :param verbose_formatter: the callable to apply to the value before displaying it in verbose mode """ if self.verbose: self.outf.write("[%s]\n" % (title,)) for name, value in items: if verbose_formatter is not None: value = verbose_formatter(value) if type(name) == str: name = name.replace(' ', '-') self.outf.write("%s = %s\n" % (name, value)) self.outf.write("\n") else: self.outf.write("%s:\n" % (title,)) for name, value in items: if normal_formatter is not None: value = normal_formatter(value) self.outf.write("\t%s\t%s\n" % (value, name)) def progress_handler(self, cmd): """Process a ProgressCommand.""" self.cmd_counts[cmd.name] += 1 def blob_handler(self, cmd): """Process a BlobCommand.""" self.cmd_counts[cmd.name] += 1 if cmd.mark is None: self.blobs['unmarked'].add(cmd.id) else: self.blobs['new'].add(cmd.id) # Marks can be re-used so remove it from used if already there. # Note: we definitely do NOT want to remove it from multi if # it's already in that set. try: self.blobs['used'].remove(cmd.id) except KeyError: pass def checkpoint_handler(self, cmd): """Process a CheckpointCommand.""" self.cmd_counts[cmd.name] += 1 def commit_handler(self, cmd): """Process a CommitCommand.""" self.cmd_counts[cmd.name] += 1 self.committers.add(cmd.committer) if cmd.author is not None: self.separate_authors_found = True for fc in cmd.iter_files(): self.file_cmd_counts[fc.name] += 1 if isinstance(fc, commands.FileModifyCommand): if fc.mode & 0111: self.executables_found = True if stat.S_ISLNK(fc.mode): self.symlinks_found = True if fc.dataref is not None: if fc.dataref[0] == ':': self._track_blob(fc.dataref) else: self.sha_blob_references = True elif isinstance(fc, commands.FileRenameCommand): self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path) elif isinstance(fc, commands.FileCopyCommand): self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) # Track the heads parents = self.reftracker.track_heads(cmd) # Track the parent counts parent_count = len(parents) if self.parent_counts.has_key(parent_count): self.parent_counts[parent_count] += 1 else: self.parent_counts[parent_count] = 1 if parent_count > self.max_parent_count: self.max_parent_count = parent_count # Remember the merges if cmd.merges: #self.merges.setdefault(cmd.ref, set()).update(cmd.merges) for merge in cmd.merges: if merge in self.merges: self.merges[merge] += 1 else: self.merges[merge] = 1 def reset_handler(self, cmd): """Process a ResetCommand.""" self.cmd_counts[cmd.name] += 1 if cmd.ref.startswith('refs/tags/'): self.lightweight_tags += 1 else: if cmd.from_ is not None: self.reftracker.track_heads_for_ref( cmd.ref, cmd.from_) def tag_handler(self, cmd): """Process a TagCommand.""" self.cmd_counts[cmd.name] += 1 def feature_handler(self, cmd): """Process a FeatureCommand.""" self.cmd_counts[cmd.name] += 1 feature = cmd.feature_name if feature not in commands.FEATURE_NAMES: self.warning("feature %s is not supported - parsing may fail" % (feature,)) def _track_blob(self, mark): if mark in self.blob_ref_counts: self.blob_ref_counts[mark] += 1 pass elif mark in self.blobs['used']: self.blob_ref_counts[mark] = 2 self.blobs['used'].remove(mark) elif mark in self.blobs['new']: self.blobs['used'].add(mark) self.blobs['new'].remove(mark) else: self.blobs['unknown'].add(mark) def _found(b): """Format a found boolean as a string.""" return ['no', 'found'][b] def _iterable_as_config_list(s): """Format an iterable as a sequence of comma-separated strings. To match what ConfigObj expects, a single item list has a trailing comma. """ items = sorted(s) if len(items) == 1: return "%s," % (items[0],) else: return ", ".join(items) bzr-fastimport-0.13.0+bzr361/tests/__init__.py0000644000000000000000000000303111645000561017175 0ustar 00000000000000# Copyright (C) 2008 Canonical Limited. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # """Tests for bzr-fastimport.""" from bzrlib import errors as bzr_errors from bzrlib.tests import TestLoader try: from bzrlib.tests.features import Feature except ImportError: # bzr < 2.5 from bzrlib.tests import Feature from bzrlib.plugins.fastimport import load_fastimport class _FastimportFeature(Feature): def _probe(self): try: load_fastimport() except bzr_errors.DependencyNotPresent: return False return True def feature_name(self): return 'fastimport' FastimportFeature = _FastimportFeature() def test_suite(): module_names = [__name__ + '.' + x for x in [ 'test_commands', 'test_exporter', 'test_branch_mapper', 'test_generic_processor', 'test_revision_store', ]] loader = TestLoader() return loader.loadTestsFromModuleNames(module_names) bzr-fastimport-0.13.0+bzr361/tests/test_branch_mapper.py0000644000000000000000000000533611643171110021305 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the BranchMapper methods.""" from bzrlib import tests from bzrlib.plugins.fastimport import ( branch_mapper, ) from bzrlib.plugins.fastimport.tests import ( FastimportFeature, ) class TestBranchMapper(tests.TestCase): _test_needs_features = [FastimportFeature] def test_git_to_bzr(self): m = branch_mapper.BranchMapper() for git, bzr in { 'refs/heads/master': 'trunk', 'refs/heads/foo': 'foo', 'refs/tags/master': 'trunk.tag', 'refs/tags/foo': 'foo.tag', 'refs/remotes/origin/master': 'trunk.remote', 'refs/remotes/origin/foo': 'foo.remote', }.items(): self.assertEqual(m.git_to_bzr(git), bzr) def test_git_to_bzr_with_slashes(self): m = branch_mapper.BranchMapper() for git, bzr in { 'refs/heads/master/slave': 'master/slave', 'refs/heads/foo/bar': 'foo/bar', 'refs/tags/master/slave': 'master/slave.tag', 'refs/tags/foo/bar': 'foo/bar.tag', 'refs/remotes/origin/master/slave': 'master/slave.remote', 'refs/remotes/origin/foo/bar': 'foo/bar.remote', }.items(): self.assertEqual(m.git_to_bzr(git), bzr) def test_git_to_bzr_for_trunk(self): # As 'master' in git is mapped to trunk in bzr, we need to handle # 'trunk' in git in a sensible way. m = branch_mapper.BranchMapper() for git, bzr in { 'refs/heads/trunk': 'git-trunk', 'refs/tags/trunk': 'git-trunk.tag', 'refs/remotes/origin/trunk': 'git-trunk.remote', 'refs/heads/git-trunk': 'git-git-trunk', 'refs/tags/git-trunk': 'git-git-trunk.tag', 'refs/remotes/origin/git-trunk':'git-git-trunk.remote', }.items(): self.assertEqual(m.git_to_bzr(git), bzr) bzr-fastimport-0.13.0+bzr361/tests/test_commands.py0000644000000000000000000001703212014701736020307 0ustar 00000000000000# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the command implementations.""" import os import re import tempfile import gzip from bzrlib import tests from bzrlib.tests.blackbox import ExternalBase from bzrlib.plugins.fastimport.cmds import ( _get_source_stream, ) from bzrlib.plugins.fastimport.tests import ( FastimportFeature, ) class TestSourceStream(tests.TestCase): _test_needs_features = [FastimportFeature] def test_get_source_stream_stdin(self): # - returns standard in self.assertIsNot(None, _get_source_stream("-")) def test_get_source_gz(self): # files ending in .gz are automatically decompressed. fd, filename = tempfile.mkstemp(suffix=".gz") f = gzip.GzipFile(fileobj=os.fdopen(fd, "w"), mode='w') f.write("bla") f.close() stream = _get_source_stream(filename) self.assertIsNot("bla", stream.read()) def test_get_source_file(self): # other files are opened as regular files. fd, filename = tempfile.mkstemp() f = os.fdopen(fd, 'w') f.write("bla") f.close() stream = _get_source_stream(filename) self.assertIsNot("bla", stream.read()) fast_export_baseline_data = """commit refs/heads/master mark :1 committer data 15 add c, remove b M 644 inline a data 13 test 1 test 3 M 644 inline c data 6 test 4 commit refs/heads/master mark :2 committer data 14 modify a again from :1 M 644 inline a data 20 test 1 test 3 test 5 commit refs/heads/master mark :3 committer data 5 add d from :2 M 644 inline d data 6 test 6 """ class TestFastExport(ExternalBase): def test_empty(self): self.make_branch_and_tree("br") self.assertEquals("", self.run_bzr("fast-export br")[0]) def test_pointless(self): tree = self.make_branch_and_tree("br") tree.commit("pointless") data = self.run_bzr("fast-export br")[0] self.assertTrue(data.startswith('commit refs/heads/master\nmark :1\ncommitter')) def test_file(self): tree = self.make_branch_and_tree("br") tree.commit("pointless") data = self.run_bzr("fast-export br br.fi")[0] self.assertEquals("", data) try: self.assertPathExists("br.fi") except AttributeError: # bzr < 2.4 self.failUnlessExists("br.fi") def test_tag_rewriting(self): tree = self.make_branch_and_tree("br") tree.commit("pointless") self.assertTrue(tree.branch.supports_tags()) rev_id = tree.branch.dotted_revno_to_revision_id((1,)) tree.branch.tags.set_tag("goodTag", rev_id) tree.branch.tags.set_tag("bad Tag", rev_id) # first check --no-rewrite-tag-names data = self.run_bzr("fast-export --plain --no-rewrite-tag-names br")[0] self.assertNotEqual(-1, data.find("reset refs/tags/goodTag")) self.assertEqual(data.find("reset refs/tags/"), data.rfind("reset refs/tags/")) # and now with --rewrite-tag-names data = self.run_bzr("fast-export --plain --rewrite-tag-names br")[0] self.assertNotEqual(-1, data.find("reset refs/tags/goodTag")) # "bad Tag" should be exported as bad_Tag self.assertNotEqual(-1, data.find("reset refs/tags/bad_Tag")) def test_no_tags(self): tree = self.make_branch_and_tree("br") tree.commit("pointless") self.assertTrue(tree.branch.supports_tags()) rev_id = tree.branch.dotted_revno_to_revision_id((1,)) tree.branch.tags.set_tag("someTag", rev_id) data = self.run_bzr("fast-export --plain --no-tags br")[0] self.assertEqual(-1, data.find("reset refs/tags/someTag")) def test_baseline_option(self): tree = self.make_branch_and_tree("bl") # Revision 1 file('bl/a', 'w').write('test 1') tree.add('a') tree.commit(message='add a') # Revision 2 file('bl/b', 'w').write('test 2') file('bl/a', 'a').write('\ntest 3') tree.add('b') tree.commit(message='add b, modify a') # Revision 3 file('bl/c', 'w').write('test 4') tree.add('c') tree.remove('b') tree.commit(message='add c, remove b') # Revision 4 file('bl/a', 'a').write('\ntest 5') tree.commit(message='modify a again') # Revision 5 file('bl/d', 'w').write('test 6') tree.add('d') tree.commit(message='add d') # This exports the baseline state at Revision 3, # followed by the deltas for 4 and 5 data = self.run_bzr("fast-export --baseline -r 3.. bl")[0] data = re.sub('committer.*', 'committer', data) self.assertEquals(fast_export_baseline_data, data) # Also confirm that --baseline with no args is identical to full export data1 = self.run_bzr("fast-export --baseline bl")[0] data2 = self.run_bzr("fast-export bl")[0] self.assertEquals(data1, data2) simple_fast_import_stream = """commit refs/heads/master mark :1 committer Jelmer Vernooij 1299718135 +0100 data 7 initial """ class TestFastImportInfo(ExternalBase): def test_simple(self): self.build_tree_contents([('simple.fi', simple_fast_import_stream)]) output = self.run_bzr("fast-import-info simple.fi")[0] self.assertEquals(output, """Command counts: \t0\tblob \t0\tcheckpoint \t1\tcommit \t0\tfeature \t0\tprogress \t0\treset \t0\ttag File command counts: \t0\tfilemodify \t0\tfiledelete \t0\tfilecopy \t0\tfilerename \t0\tfiledeleteall Parent counts: \t1\tparents-0 \t0\ttotal revisions merged Commit analysis: \tno\texecutables \tno\tseparate authors found \tno\tsymlinks \tno\tblobs referenced by SHA Head analysis: \t[':1']\trefs/heads/master Merges: """) class TestFastImport(ExternalBase): def test_empty(self): self.build_tree_contents([('empty.fi', "")]) self.make_branch_and_tree("br") self.assertEquals("", self.run_bzr("fast-import empty.fi br")[0]) def test_file(self): tree = self.make_branch_and_tree("br") self.build_tree_contents([('file.fi', simple_fast_import_stream)]) data = self.run_bzr("fast-import file.fi br")[0] self.assertEquals(1, tree.branch.revno()) def test_missing_bytes(self): self.build_tree_contents([('empty.fi', """ commit refs/heads/master mark :1 committer data 15 """)]) self.make_branch_and_tree("br") self.run_bzr_error(['bzr: ERROR: 4: Parse error: line 4: Command commit is missing section committer\n'], "fast-import empty.fi br") class TestFastImportFilter(ExternalBase): def test_empty(self): self.build_tree_contents([('empty.fi', "")]) self.make_branch_and_tree("br") self.assertEquals("", self.run_bzr("fast-import-filter -")[0]) def test_default_stdin(self): self.build_tree_contents([('empty.fi', "")]) self.make_branch_and_tree("br") self.assertEquals("", self.run_bzr("fast-import-filter")[0]) bzr-fastimport-0.13.0+bzr361/tests/test_exporter.py0000644000000000000000000001222211647001263020351 0ustar 00000000000000# Copyright (C) 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test the exporter.""" import os import tempfile import gzip from bzrlib import tests from bzrlib.plugins.fastimport.exporter import ( _get_output_stream, check_ref_format, sanitize_ref_name_for_git ) from bzrlib.plugins.fastimport.tests import ( FastimportFeature, ) class TestOutputStream(tests.TestCase): _test_needs_features = [FastimportFeature] def test_get_output_stream_stdout(self): # - returns standard out self.assertIsNot(None, _get_output_stream("-")) def test_get_source_gz(self): fd, filename = tempfile.mkstemp(suffix=".gz") os.close(fd) stream = _get_output_stream(filename) stream.write("bla") stream.close() # files ending in .gz are automatically decompressed. f = gzip.GzipFile(filename) self.assertEquals("bla", f.read()) f.close() def test_get_source_file(self): # other files are opened as regular files. fd, filename = tempfile.mkstemp() os.close(fd) stream = _get_output_stream(filename) stream.write("foo") stream.close() f = open(filename, 'r') self.assertEquals("foo", f.read()) f.close() # from dulwich.tests.test_repository: class CheckRefFormatTests(tests.TestCase): """Tests for the check_ref_format function. These are the same tests as in the git test suite. """ def test_valid(self): self.assertTrue(check_ref_format('heads/foo')) self.assertTrue(check_ref_format('foo/bar/baz')) self.assertTrue(check_ref_format('refs///heads/foo')) self.assertTrue(check_ref_format('foo./bar')) self.assertTrue(check_ref_format('heads/foo@bar')) self.assertTrue(check_ref_format('heads/fix.lock.error')) def test_invalid(self): self.assertFalse(check_ref_format('foo')) self.assertFalse(check_ref_format('foo/.bar')) self.assertFalse(check_ref_format('heads/foo/')) self.assertFalse(check_ref_format('heads/foo.')) self.assertFalse(check_ref_format('./foo')) self.assertFalse(check_ref_format('.refs/foo')) self.assertFalse(check_ref_format('heads/foo..bar')) self.assertFalse(check_ref_format('heads/foo?bar')) self.assertFalse(check_ref_format('heads/foo.lock')) self.assertFalse(check_ref_format('heads/v@{ation')) self.assertFalse(check_ref_format('heads/foo\\bar')) self.assertFalse(check_ref_format('heads/foo\bar')) self.assertFalse(check_ref_format('heads/foo bar')) self.assertFalse(check_ref_format('heads/foo\020bar')) self.assertFalse(check_ref_format('heads/foo\177bar')) class CheckRefnameRewriting(tests.TestCase): """Tests for sanitize_ref_name_for_git function""" def test_passthrough_valid(self): self.assertEqual(sanitize_ref_name_for_git('heads/foo'), 'heads/foo') self.assertEqual(sanitize_ref_name_for_git('foo/bar/baz'), 'foo/bar/baz') self.assertEqual(sanitize_ref_name_for_git('refs///heads/foo'), 'refs///heads/foo') self.assertEqual(sanitize_ref_name_for_git('foo./bar'), 'foo./bar') self.assertEqual(sanitize_ref_name_for_git('heads/foo@bar'), 'heads/foo@bar') self.assertEqual(sanitize_ref_name_for_git('heads/fix.lock.error'), 'heads/fix.lock.error') def test_rewrite_invalid(self): self.assertTrue(check_ref_format(sanitize_ref_name_for_git('foo./bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo/'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('./foo'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('.refs/foo'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo..bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo?bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo.lock'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/v@{ation'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\\bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\020bar'))) self.assertTrue(check_ref_format(sanitize_ref_name_for_git('heads/foo\177bar'))) bzr-fastimport-0.13.0+bzr361/tests/test_generic_processor.py0000644000000000000000000026241711717731457022246 0ustar 00000000000000# Copyright (C) 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import time from bzrlib import ( tests, ) from bzrlib.plugins.fastimport.helpers import ( kind_to_mode, ) from bzrlib.plugins.fastimport.tests import ( FastimportFeature, ) try: from fastimport import commands except ImportError: commands = object() def load_tests(standard_tests, module, loader): """Parameterize tests for all versions of groupcompress.""" scenarios = [ ('pack-0.92', {'branch_format': 'pack-0.92'}), ('1.9-rich-root', {'branch_format': '1.9-rich-root'}), ] try: from bzrlib.repofmt.groupcompress_repo import RepositoryFormat2a scenarios.append(('2a', {'branch_format': '2a'})) except ImportError: pass suite = loader.suiteClass() result = tests.multiply_tests(standard_tests, scenarios, suite) return result class TestCaseForGenericProcessor(tests.TestCaseWithTransport): _test_needs_features = [FastimportFeature] branch_format = "pack-0.92" def get_handler(self): from bzrlib.plugins.fastimport.processors import ( generic_processor, ) branch = self.make_branch('.', format=self.branch_format) handler = generic_processor.GenericProcessor(branch.bzrdir) return handler, branch # FIXME: [] as a default is bad, as it is mutable, but I want # to use None to mean "don't check this". def assertChanges(self, branch, revno, expected_added=[], expected_removed=[], expected_modified=[], expected_renamed=[], expected_kind_changed=[]): """Check the changes introduced in a revision of a branch. This method checks that a revision introduces expected changes. The required changes are passed in as a list, where each entry contains the needed information about the change. If you do not wish to assert anything about a particular category then pass None instead. branch: The branch. revno: revision number of revision to check. expected_added: a list of (filename,) tuples that must have been added in the delta. expected_removed: a list of (filename,) tuples that must have been removed in the delta. expected_modified: a list of (filename,) tuples that must have been modified in the delta. expected_renamed: a list of (old_path, new_path) tuples that must have been renamed in the delta. expected_kind_changed: a list of (path, old_kind, new_kind) tuples that must have been changed in the delta. :return: revtree1, revtree2 """ repo = branch.repository revtree1 = repo.revision_tree(branch.get_rev_id(revno - 1)) revtree2 = repo.revision_tree(branch.get_rev_id(revno)) changes = revtree2.changes_from(revtree1) self._check_changes(changes, expected_added, expected_removed, expected_modified, expected_renamed, expected_kind_changed) return revtree1, revtree2 def _check_changes(self, changes, expected_added=[], expected_removed=[], expected_modified=[], expected_renamed=[], expected_kind_changed=[]): """Check the changes in a TreeDelta This method checks that the TreeDelta contains the expected modifications between the two trees that were used to generate it. The required changes are passed in as a list, where each entry contains the needed information about the change. If you do not wish to assert anything about a particular category then pass None instead. changes: The TreeDelta to check. expected_added: a list of (filename,) tuples that must have been added in the delta. expected_removed: a list of (filename,) tuples that must have been removed in the delta. expected_modified: a list of (filename,) tuples that must have been modified in the delta. expected_renamed: a list of (old_path, new_path) tuples that must have been renamed in the delta. expected_kind_changed: a list of (path, old_kind, new_kind) tuples that must have been changed in the delta. """ renamed = changes.renamed added = changes.added removed = changes.removed modified = changes.modified kind_changed = changes.kind_changed if expected_renamed is not None: self.assertEquals(len(renamed), len(expected_renamed), "%s is renamed, expected %s" % (renamed, expected_renamed)) renamed_files = [(item[0], item[1]) for item in renamed] for expected_renamed_entry in expected_renamed: self.assertTrue(expected_renamed_entry in renamed_files, "%s is not renamed, %s are" % (str(expected_renamed_entry), renamed_files)) if expected_added is not None: self.assertEquals(len(added), len(expected_added), "%s is added" % str(added)) added_files = [(item[0],) for item in added] for expected_added_entry in expected_added: self.assertTrue(expected_added_entry in added_files, "%s is not added, %s are" % (str(expected_added_entry), added_files)) if expected_removed is not None: self.assertEquals(len(removed), len(expected_removed), "%s is removed" % str(removed)) removed_files = [(item[0],) for item in removed] for expected_removed_entry in expected_removed: self.assertTrue(expected_removed_entry in removed_files, "%s is not removed, %s are" % (str(expected_removed_entry), removed_files)) if expected_modified is not None: self.assertEquals(len(modified), len(expected_modified), "%s is modified" % str(modified)) modified_files = [(item[0],) for item in modified] for expected_modified_entry in expected_modified: self.assertTrue(expected_modified_entry in modified_files, "%s is not modified, %s are" % ( str(expected_modified_entry), modified_files)) if expected_kind_changed is not None: self.assertEquals(len(kind_changed), len(expected_kind_changed), "%s is kind-changed, expected %s" % (kind_changed, expected_kind_changed)) kind_changed_files = [(item[0], item[2], item[3]) for item in kind_changed] for expected_kind_changed_entry in expected_kind_changed: self.assertTrue(expected_kind_changed_entry in kind_changed_files, "%s is not kind-changed, %s are" % ( str(expected_kind_changed_entry), kind_changed_files)) def assertContent(self, branch, tree, path, content): file_id = tree.path2id(path) branch.lock_read() self.addCleanup(branch.unlock) self.assertEqual(tree.get_file_text(file_id), content) def assertSymlinkTarget(self, branch, tree, path, target): file_id = tree.path2id(path) branch.lock_read() self.addCleanup(branch.unlock) self.assertEqual(tree.get_symlink_target(file_id), target) def assertExecutable(self, branch, tree, path, executable): file_id = tree.path2id(path) branch.lock_read() self.addCleanup(branch.unlock) self.assertEqual(tree.is_executable(file_id), executable) def assertRevisionRoot(self, revtree, path): self.assertEqual(revtree.get_revision_id(), revtree.get_file_revision(revtree.path2id(path))) class TestImportToPackTag(TestCaseForGenericProcessor): def file_command_iter(self, path, kind='file', content='aaa', executable=False, to_kind=None, to_content='bbb', to_executable=None): # Revno 1: create a file or symlink # Revno 2: modify it if to_kind is None: to_kind = kind if to_executable is None: to_executable = executable def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, executable), None, content) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable), None, to_content) # pass "head" for from_ to show that #401249 is worse than I knew yield commands.CommitCommand('head', '2', author, committer, "commit 2", "head", [], files_two) yield commands.TagCommand('tag1', ':1', committer, "tag 1") # pass "head" for from_ to demonstrate #401249 yield commands.TagCommand('tag2', 'head', committer, "tag 2") return command_list def test_tag(self): handler, branch = self.get_handler() path = 'a' raise tests.KnownFailure("non-mark committish not yet supported" "- bug #410249") handler.process(self.file_command_iter(path)) class TestImportToPackModify(TestCaseForGenericProcessor): def file_command_iter(self, path, kind='file', content='aaa', executable=False, to_kind=None, to_content='bbb', to_executable=None): # Revno 1: create a file or symlink # Revno 2: modify it if to_kind is None: to_kind = kind if to_executable is None: to_executable = executable mode = kind_to_mode(kind, executable) to_mode = kind_to_mode(to_kind, to_executable) def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, mode, None, content) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand(path, to_mode, None, to_content) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_modify_file_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertContent(branch, revtree2, path, "bbb") self.assertRevisionRoot(revtree1, path) self.assertRevisionRoot(revtree2, path) def test_modify_file_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertContent(branch, revtree2, path, "bbb") def test_modify_symlink_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path, kind='symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertSymlinkTarget(branch, revtree2, path, "bbb") self.assertRevisionRoot(revtree1, path) self.assertRevisionRoot(revtree2, path) def test_modify_symlink_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertSymlinkTarget(branch, revtree2, path, "bbb") def test_modify_file_becomes_symlink(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='file', to_kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_kind_changed=[(path, 'file', 'symlink')]) self.assertContent(branch, revtree1, path, "aaa") self.assertSymlinkTarget(branch, revtree2, path, "bbb") def test_modify_symlink_becomes_file(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='symlink', to_kind='file')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_kind_changed=[(path, 'symlink', 'file')]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertContent(branch, revtree2, path, "bbb") def test_modify_file_now_executable(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, executable=False, to_executable=True, to_content='aaa')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertExecutable(branch, revtree1, path, False) self.assertExecutable(branch, revtree2, path, True) def test_modify_file_no_longer_executable(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, executable=True, to_executable=False, to_content='aaa')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(path,)]) self.assertExecutable(branch, revtree1, path, True) self.assertExecutable(branch, revtree2, path, False) class TestImportToPackModifyTwice(TestCaseForGenericProcessor): """This tests when the same file is modified twice in the one commit. Note: hg-fast-export produces data like this on occasions. """ def file_command_iter(self, path, kind='file', content='aaa', executable=False, to_kind=None, to_content='bbb', to_executable=None): # Revno 1: create a file twice if to_kind is None: to_kind = kind if to_executable is None: to_executable = executable def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, executable), None, content) yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable), None, to_content) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) return command_list def test_modify_file_twice_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertRevisionRoot(revtree1, path) class TestImportToPackModifyTricky(TestCaseForGenericProcessor): def file_command_iter(self, path1, path2, kind='file'): # Revno 1: create a file or symlink in a directory # Revno 2: create a second file that implicitly deletes the # first one because either: # * the new file is a in directory with the old file name # * the new file has the same name as the directory of the first def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path1, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand(path2, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_modify_file_becomes_directory(self): handler, branch = self.get_handler() path1 = 'a/b' path2 = 'a/b/c' handler.process(self.file_command_iter(path1, path2)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path1,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(path2,)], expected_kind_changed=[(path1, 'file', 'directory')]) self.assertContent(branch, revtree1, path1, "aaa") self.assertContent(branch, revtree2, path2, "bbb") def test_modify_directory_becomes_file(self): handler, branch = self.get_handler() path1 = 'a/b/c' path2 = 'a/b' handler.process(self.file_command_iter(path1, path2)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), (path1,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path1,),], expected_kind_changed=[(path2, 'directory', 'file')]) self.assertContent(branch, revtree1, path1, "aaa") self.assertContent(branch, revtree2, path2, "bbb") def test_modify_symlink_becomes_directory(self): handler, branch = self.get_handler() path1 = 'a/b' path2 = 'a/b/c' handler.process(self.file_command_iter(path1, path2, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path1,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(path2,)], expected_kind_changed=[(path1, 'symlink', 'directory')]) self.assertSymlinkTarget(branch, revtree1, path1, "aaa") self.assertSymlinkTarget(branch, revtree2, path2, "bbb") def test_modify_directory_becomes_symlink(self): handler, branch = self.get_handler() path1 = 'a/b/c' path2 = 'a/b' handler.process(self.file_command_iter(path1, path2, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), (path1,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path1,),], expected_kind_changed=[(path2, 'directory', 'symlink')]) self.assertSymlinkTarget(branch, revtree1, path1, "aaa") self.assertSymlinkTarget(branch, revtree2, path2, "bbb") class TestImportToPackDelete(TestCaseForGenericProcessor): def file_command_iter(self, path, kind='file'): # Revno 1: create a file or symlink # Revno 2: delete it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_file_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertRevisionRoot(revtree1, path) def test_delete_file_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('a',), (path,)]) self.assertContent(branch, revtree1, path, "aaa") def test_delete_symlink_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path, kind='symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertRevisionRoot(revtree1, path) def test_delete_symlink_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('a',), (path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") def test_delete_file_in_deep_subdir(self): handler, branch = self.get_handler() path = 'a/b/c/d' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), ('a/b/c',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('a',), ('a/b',), ('a/b/c',), (path,)]) self.assertContent(branch, revtree1, path, "aaa") class TestImportToPackDeleteNew(TestCaseForGenericProcessor): """Test deletion of a newly added file.""" def file_command_iter(self, path, kind='file'): # Revno 1: create a file or symlink then delete it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, False), None, "aaa") yield commands.FileDeleteCommand(path) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) return command_list def test_delete_new_file_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1,) def test_delete_new_file_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1,) def test_delete_new_symlink_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path, kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1,) def test_delete_new_symlink_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1,) def test_delete_new_file_in_deep_subdir(self): handler, branch = self.get_handler() path = 'a/b/c/d' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1,) class TestImportToPackDeleteMultiLevel(TestCaseForGenericProcessor): def file_command_iter(self, paths, paths_to_delete): # Revno 1: create multiple files # Revno 2: delete multiple files def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): for i, path in enumerate(paths): yield commands.FileModifyCommand(path, kind_to_mode('file', False), None, "aaa%d" % i) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): for path in paths_to_delete: yield commands.FileDeleteCommand(path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_files_in_multiple_levels(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d/e'] paths_to_delete = ['a/b/c', 'a/b/d/e'] handler.process(self.file_command_iter(paths, paths_to_delete)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[ ('a',), ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/d/e',), ]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[ ('a',), ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/d/e',), ]) def test_delete_file_single_level(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d/e'] paths_to_delete = ['a/b/d/e'] handler.process(self.file_command_iter(paths, paths_to_delete)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[ ('a',), ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/d/e',), ]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[ ('a/b/d',), ('a/b/d/e',), ]) def test_delete_file_complex_level(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/h', 'a/b/d/i/j'] paths_to_delete = ['a/b/c', 'a/b/d/e', 'a/f/g', 'a/b/d/i/j'] handler.process(self.file_command_iter(paths, paths_to_delete)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[ ('a',), ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/d/e',), ('a/f',), ('a/f/g',), ('a/h',), ('a/b/d/i',), ('a/b/d/i/j',), ]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[ ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/d/e',), ('a/f',), ('a/f/g',), ('a/b/d/i',), ('a/b/d/i/j',), ]) class TestImportToPackDeleteThenAdd(TestCaseForGenericProcessor): """Test delete followed by an add. Merges can cause this.""" def file_command_iter(self, path, kind='file', content='aaa', executable=False, to_kind=None, to_content='bbb', to_executable=None): # Revno 1: create a file or symlink # Revno 2: delete it and add it if to_kind is None: to_kind = kind if to_executable is None: to_executable = executable def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, executable), None, content) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(path) yield commands.FileModifyCommand(path, kind_to_mode(to_kind, to_executable), None, to_content) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_then_add_file_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)], expected_added=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertContent(branch, revtree2, path, "bbb") self.assertRevisionRoot(revtree1, path) self.assertRevisionRoot(revtree2, path) def test_delete_then_add_file_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)], expected_added=[(path,)]) self.assertContent(branch, revtree1, path, "aaa") self.assertContent(branch, revtree2, path, "bbb") def test_delete_then_add_symlink_in_root(self): handler, branch = self.get_handler() path = 'a' handler.process(self.file_command_iter(path, kind='symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)], expected_added=[(path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertSymlinkTarget(branch, revtree2, path, "bbb") self.assertRevisionRoot(revtree1, path) self.assertRevisionRoot(revtree2, path) def test_delete_then_add_symlink_in_subdir(self): handler, branch = self.get_handler() path = 'a/a' handler.process(self.file_command_iter(path, kind='symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(path,)], expected_added=[(path,)]) self.assertSymlinkTarget(branch, revtree1, path, "aaa") self.assertSymlinkTarget(branch, revtree2, path, "bbb") class TestImportToPackDeleteDirectory(TestCaseForGenericProcessor): def file_command_iter(self, paths, dir): # Revno 1: create multiple files # Revno 2: delete a directory holding those files def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): for i, path in enumerate(paths): yield commands.FileModifyCommand(path, kind_to_mode('file', False), None, "aaa%d" % i) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(dir) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_dir(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d', 'a/b/e/f', 'a/g'] dir = 'a/b' handler.process(self.file_command_iter(paths, dir)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[ ('a',), ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/e',), ('a/b/e/f',), ('a/g',), ]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[ ('a/b',), ('a/b/c',), ('a/b/d',), ('a/b/e',), ('a/b/e/f',), ]) class TestImportToPackDeleteDirectoryThenAddFile(TestCaseForGenericProcessor): """Test deleting a directory then adding a file in the same commit.""" def file_command_iter(self, paths, dir, new_path, kind='file'): # Revno 1: create files in a directory # Revno 2: delete the directory then add a file into it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): for i, path in enumerate(paths): yield commands.FileModifyCommand(path, kind_to_mode(kind, False), None, "aaa%d" % i) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(dir) yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_dir_then_add_file(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d'] dir = 'a/b' new_path = 'a/b/z' handler.process(self.file_command_iter(paths, dir, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)], expected_added=[('a/b',), ('a/b/z',)]) self.assertContent(branch, revtree2, new_path, "bbb") def test_delete_dir_then_add_symlink(self): handler, branch = self.get_handler() paths = ['a/b/c', 'a/b/d'] dir = 'a/b' new_path = 'a/b/z' handler.process(self.file_command_iter(paths, dir, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), ('a/b/c',), ('a/b/d',),]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('a/b',), ('a/b/c',), ('a/b/d',)], expected_added=[('a/b',), ('a/b/z',)]) self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") class TestImportToPackRename(TestCaseForGenericProcessor): def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create a file or symlink # Revno 2: rename it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileRenameCommand(old_path, new_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_rename_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'a/b' handler.process(self.get_command_iter(old_path, new_path)) self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) def test_rename_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'a/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) def test_rename_file_to_new_dir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'b/a' handler.process(self.get_command_iter(old_path, new_path)) self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('b',)], expected_removed=[('a',)]) def test_rename_symlink_to_new_dir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'b/a' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('b',)], expected_removed=[('a',)]) class TestImportToPackRenameNew(TestCaseForGenericProcessor): """Test rename of a newly added file.""" def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create a file and rename it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.FileRenameCommand(old_path, new_path) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) return command_list def test_rename_new_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(new_path,)]) self.assertRevisionRoot(revtree1, new_path) def test_rename_new_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(new_path,)]) self.assertRevisionRoot(revtree1, new_path) def test_rename_new_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'a/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (new_path,)]) def test_rename_new_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'a/a' new_path = 'a/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (new_path,)]) class TestImportToPackRenameToDeleted(TestCaseForGenericProcessor): """Test rename to a destination path deleted in this commit.""" def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create two files # Revno 2: delete one, rename the other one to that path def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(new_path) yield commands.FileRenameCommand(old_path, new_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_rename_to_deleted_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "bbb") self.assertContent(branch, revtree2, new_path, "aaa") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree1, new_path) def test_rename_to_deleted_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree1, new_path) def test_rename_to_deleted_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "bbb") self.assertContent(branch, revtree2, new_path, "aaa") def test_rename_to_deleted_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") def test_rename_to_deleted_file_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,), ('d2',), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('d1',), (new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "bbb") self.assertContent(branch, revtree2, new_path, "aaa") def test_rename_to_deleted_symlink_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,), ('d2',), (new_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('d1',), (new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "bbb") self.assertSymlinkTarget(branch, revtree2, new_path, "aaa") class TestImportToPackRenameModified(TestCaseForGenericProcessor): """Test rename of a path previously modified in this commit.""" def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create a file or symlink # Revno 2: modify then rename it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "bbb") yield commands.FileRenameCommand(old_path, new_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_rename_of_modified_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_of_modified_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_of_modified_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") def test_rename_of_modified_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") def test_rename_of_modified_file_to_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('d2',)], expected_removed=[('d1',)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") def test_rename_of_modified_symlink_to_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('d2',)], expected_removed=[('d1',)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") class TestImportToPackRenameThenModify(TestCaseForGenericProcessor): """Test rename of a path then modfy the new-path in the same commit.""" def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create a file or symlink # Revno 2: rename it then modify the newly created path def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileRenameCommand(old_path, new_path) yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_rename_then_modify_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_then_modify_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") def test_rename_then_modify_file_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('d2',)], expected_removed=[('d1',)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree2, new_path, "bbb") def test_rename_then_modify_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree2, new_path) def test_rename_then_modify_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") def test_rename_then_modify_symlink_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), (old_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path, new_path)], expected_added=[('d2',)], expected_removed=[('d1',)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") class TestImportToPackDeleteRenameThenModify(TestCaseForGenericProcessor): """Test rename of to a deleted path then modfy the new-path in the same commit.""" def get_command_iter(self, old_path, new_path, kind='file'): # Revno 1: create two files or symlinks # Revno 2: delete one, rename the other to it then modify the newly created path def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(old_path, kind_to_mode(kind, False), None, "aaa") yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), None, "zzz") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(new_path) yield commands.FileRenameCommand(old_path, new_path) yield commands.FileModifyCommand(new_path, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_delete_rename_then_modify_file_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "zzz") self.assertContent(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree1, new_path) self.assertRevisionRoot(revtree2, new_path) def test_delete_rename_then_modify_file_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "zzz") self.assertContent(branch, revtree2, new_path, "bbb") def test_delete_rename_then_modify_file_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), ('d2',), (old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('d1',), (new_path,)], expected_renamed=[(old_path, new_path)]) self.assertContent(branch, revtree1, old_path, "aaa") self.assertContent(branch, revtree1, new_path, "zzz") self.assertContent(branch, revtree2, new_path, "bbb") def test_delete_rename_then_modify_symlink_in_root(self): handler, branch = self.get_handler() old_path = 'a' new_path = 'b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") self.assertRevisionRoot(revtree1, old_path) self.assertRevisionRoot(revtree1, new_path) self.assertRevisionRoot(revtree2, new_path) def test_delete_rename_then_modify_symlink_in_subdir(self): handler, branch = self.get_handler() old_path = 'd/a' new_path = 'd/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") def test_delete_rename_then_modify_symlink_in_new_dir(self): handler, branch = self.get_handler() old_path = 'd1/a' new_path = 'd2/b' handler.process(self.get_command_iter(old_path, new_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d1',), ('d2',), (old_path,), (new_path,)]) # Note: the delta doesn't show the modification? # The actual new content is validated in the assertions following. revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[('d1',), (new_path,)], expected_renamed=[(old_path, new_path)]) self.assertSymlinkTarget(branch, revtree1, old_path, "aaa") self.assertSymlinkTarget(branch, revtree1, new_path, "zzz") self.assertSymlinkTarget(branch, revtree2, new_path, "bbb") class TestImportToPackRenameTricky(TestCaseForGenericProcessor): def file_command_iter(self, path1, old_path2, new_path2, kind='file'): # Revno 1: create two files or symlinks in a directory # Revno 2: rename the second file so that it implicitly deletes the # first one because either: # * the new file is a in directory with the old file name # * the new file has the same name as the directory of the first def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path1, kind_to_mode(kind, False), None, "aaa") yield commands.FileModifyCommand(old_path2, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileRenameCommand(old_path2, new_path2) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_rename_file_becomes_directory(self): handler, branch = self.get_handler() old_path2 = 'foo' path1 = 'a/b' new_path2 = 'a/b/c' handler.process(self.file_command_iter(path1, old_path2, new_path2)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path1,), (old_path2,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path2, new_path2)], expected_kind_changed=[(path1, 'file', 'directory')]) self.assertContent(branch, revtree1, path1, "aaa") self.assertContent(branch, revtree2, new_path2, "bbb") def test_rename_directory_becomes_file(self): handler, branch = self.get_handler() old_path2 = 'foo' path1 = 'a/b/c' new_path2 = 'a/b' handler.process(self.file_command_iter(path1, old_path2, new_path2)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), (path1,), (old_path2,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path2, new_path2)], expected_removed=[(path1,), (new_path2,)]) self.assertContent(branch, revtree1, path1, "aaa") self.assertContent(branch, revtree2, new_path2, "bbb") def test_rename_symlink_becomes_directory(self): handler, branch = self.get_handler() old_path2 = 'foo' path1 = 'a/b' new_path2 = 'a/b/c' handler.process(self.file_command_iter(path1, old_path2, new_path2, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (path1,), (old_path2,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path2, new_path2)], expected_kind_changed=[(path1, 'symlink', 'directory')]) self.assertSymlinkTarget(branch, revtree1, path1, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path2, "bbb") def test_rename_directory_becomes_symlink(self): handler, branch = self.get_handler() old_path2 = 'foo' path1 = 'a/b/c' new_path2 = 'a/b' handler.process(self.file_command_iter(path1, old_path2, new_path2, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), ('a/b',), (path1,), (old_path2,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_renamed=[(old_path2, new_path2)], expected_removed=[(path1,), (new_path2,)]) self.assertSymlinkTarget(branch, revtree1, path1, "aaa") self.assertSymlinkTarget(branch, revtree2, new_path2, "bbb") class TestImportToPackCopy(TestCaseForGenericProcessor): def file_command_iter(self, src_path, dest_path, kind='file'): # Revno 1: create a file or symlink # Revno 2: copy it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileCopyCommand(src_path, dest_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_copy_file_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "aaa") self.assertContent(branch, revtree2, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree2, dest_path) def test_copy_file_in_subdir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'a/b' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "aaa") self.assertContent(branch, revtree2, dest_path, "aaa") def test_copy_file_to_new_dir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'b/a' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[('b',), (dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "aaa") self.assertContent(branch, revtree2, dest_path, "aaa") def test_copy_symlink_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree2, dest_path) def test_copy_symlink_in_subdir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'a/b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") def test_copy_symlink_to_new_dir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'b/a' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_added=[('b',), (dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") class TestImportToPackCopyNew(TestCaseForGenericProcessor): """Test copy of a newly added file.""" def file_command_iter(self, src_path, dest_path, kind='file'): # Revno 1: create a file or symlink and copy it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "aaa") yield commands.FileCopyCommand(src_path, dest_path) yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) return command_list def test_copy_new_file_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(src_path,), (dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree1, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree1, dest_path) def test_copy_new_file_in_subdir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'a/b' handler.process(self.file_command_iter(src_path, dest_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (src_path,), (dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree1, dest_path, "aaa") def test_copy_new_file_to_new_dir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'b/a' handler.process(self.file_command_iter(src_path, dest_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (src_path,), ('b',), (dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree1, dest_path, "aaa") def test_copy_new_symlink_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(src_path,), (dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree1, dest_path) def test_copy_new_symlink_in_subdir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'a/b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (src_path,), (dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") def test_copy_new_symlink_to_new_dir(self): handler, branch = self.get_handler() src_path = 'a/a' dest_path = 'b/a' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('a',), (src_path,), ('b',), (dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree1, dest_path, "aaa") class TestImportToPackCopyToDeleted(TestCaseForGenericProcessor): def file_command_iter(self, src_path, dest_path, kind='file'): # Revno 1: create two files or symlinks # Revno 2: delete one and copy the other one to its path def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "aaa") yield commands.FileModifyCommand(dest_path, kind_to_mode(kind, False), None, "bbb") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileDeleteCommand(dest_path) yield commands.FileCopyCommand(src_path, dest_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_copy_to_deleted_file_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(src_path,), (dest_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(dest_path,)], expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree1, dest_path, "bbb") self.assertContent(branch, revtree2, src_path, "aaa") self.assertContent(branch, revtree2, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree1, dest_path) def test_copy_to_deleted_symlink_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[(src_path,), (dest_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(dest_path,)], expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb") self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree1, dest_path) def test_copy_to_deleted_file_in_subdir(self): handler, branch = self.get_handler() src_path = 'd/a' dest_path = 'd/b' handler.process(self.file_command_iter(src_path, dest_path)) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (src_path,), (dest_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(dest_path,)], expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree1, dest_path, "bbb") self.assertContent(branch, revtree2, src_path, "aaa") self.assertContent(branch, revtree2, dest_path, "aaa") def test_copy_to_deleted_symlink_in_subdir(self): handler, branch = self.get_handler() src_path = 'd/a' dest_path = 'd/b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree0, revtree1 = self.assertChanges(branch, 1, expected_added=[('d',), (src_path,), (dest_path,)]) revtree1, revtree2 = self.assertChanges(branch, 2, expected_removed=[(dest_path,)], expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree1, dest_path, "bbb") self.assertSymlinkTarget(branch, revtree2, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, dest_path, "aaa") class TestImportToPackCopyModified(TestCaseForGenericProcessor): """Test copy of file/symlink already modified in this commit.""" def file_command_iter(self, src_path, dest_path, kind='file'): # Revno 1: create a file or symlink # Revno 2: modify and copy it def command_list(): author = ['', 'bugs@a.com', time.time(), time.timezone] committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "aaa") yield commands.CommitCommand('head', '1', author, committer, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand(src_path, kind_to_mode(kind, False), None, "bbb") yield commands.FileCopyCommand(src_path, dest_path) yield commands.CommitCommand('head', '2', author, committer, "commit 2", ":1", [], files_two) return command_list def test_copy_of_modified_file_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "bbb") self.assertContent(branch, revtree2, dest_path, "bbb") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree2, dest_path) def test_copy_of_modified_file_in_subdir(self): handler, branch = self.get_handler() src_path = 'd/a' dest_path = 'd/b' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[(dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "bbb") self.assertContent(branch, revtree2, dest_path, "bbb") def test_copy_of_modified_file_to_new_dir(self): handler, branch = self.get_handler() src_path = 'd1/a' dest_path = 'd2/a' handler.process(self.file_command_iter(src_path, dest_path)) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[('d2',), (dest_path,)]) self.assertContent(branch, revtree1, src_path, "aaa") self.assertContent(branch, revtree2, src_path, "bbb") self.assertContent(branch, revtree2, dest_path, "bbb") def test_copy_of_modified_symlink_in_root(self): handler, branch = self.get_handler() src_path = 'a' dest_path = 'b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") self.assertRevisionRoot(revtree1, src_path) self.assertRevisionRoot(revtree2, dest_path) def test_copy_of_modified_symlink_in_subdir(self): handler, branch = self.get_handler() src_path = 'd/a' dest_path = 'd/b' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[(dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") def test_copy_of_modified_symlink_to_new_dir(self): handler, branch = self.get_handler() src_path = 'd1/a' dest_path = 'd2/a' handler.process(self.file_command_iter(src_path, dest_path, 'symlink')) revtree1, revtree2 = self.assertChanges(branch, 2, expected_modified=[(src_path,)], expected_added=[('d2',), (dest_path,)]) self.assertSymlinkTarget(branch, revtree1, src_path, "aaa") self.assertSymlinkTarget(branch, revtree2, src_path, "bbb") self.assertSymlinkTarget(branch, revtree2, dest_path, "bbb") class TestImportToPackFileKinds(TestCaseForGenericProcessor): def get_command_iter(self, path, kind, content): def command_list(): committer = ['', 'elmer@a.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand(path, kind_to_mode(kind, False), None, content) yield commands.CommitCommand('head', '1', None, committer, "commit 1", None, [], files_one) return command_list def test_import_plainfile(self): handler, branch = self.get_handler() handler.process(self.get_command_iter('foo', 'file', 'aaa')) def test_import_symlink(self): handler, branch = self.get_handler() handler.process(self.get_command_iter('foo', 'symlink', 'bar')) class TestModifyRevertInBranch(TestCaseForGenericProcessor): def file_command_iter(self): # A add 'foo' # |\ # | B modify 'foo' # | | # | C revert 'foo' back to A # |/ # D merge 'foo' def command_list(): committer_a = ['', 'a@elmer.com', time.time(), time.timezone] committer_b = ['', 'b@elmer.com', time.time(), time.timezone] committer_c = ['', 'c@elmer.com', time.time(), time.timezone] committer_d = ['', 'd@elmer.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand('foo', kind_to_mode('file', False), None, "content A\n") yield commands.CommitCommand('head', '1', None, committer_a, "commit 1", None, [], files_one) def files_two(): yield commands.FileModifyCommand('foo', kind_to_mode('file', False), None, "content B\n") yield commands.CommitCommand('head', '2', None, committer_b, "commit 2", ":1", [], files_two) def files_three(): yield commands.FileModifyCommand('foo', kind_to_mode('file', False), None, "content A\n") yield commands.CommitCommand('head', '3', None, committer_c, "commit 3", ":2", [], files_three) yield commands.CommitCommand('head', '4', None, committer_d, "commit 4", ":1", [':3'], lambda: []) return command_list def test_modify_revert(self): handler, branch = self.get_handler() handler.process(self.file_command_iter()) branch.lock_read() self.addCleanup(branch.unlock) rev_d = branch.last_revision() rev_a, rev_c = branch.repository.get_parent_map([rev_d])[rev_d] rev_b = branch.repository.get_parent_map([rev_c])[rev_c][0] rtree_a, rtree_b, rtree_c, rtree_d = branch.repository.revision_trees([ rev_a, rev_b, rev_c, rev_d]) foo_id = rtree_a.path2id('foo') self.assertEqual(rev_a, rtree_a.get_file_revision(foo_id)) self.assertEqual(rev_b, rtree_b.get_file_revision(foo_id)) self.assertEqual(rev_c, rtree_c.get_file_revision(foo_id)) self.assertEqual(rev_c, rtree_d.get_file_revision(foo_id)) class TestCommitCommands(TestCaseForGenericProcessor): def test_non_utf8_commit_message(self): handler, branch = self.get_handler() def files_one(): yield commands.FileModifyCommand('a', kind_to_mode('file', False), None, "data") def command_list(): committer = ['', 'elmer@a.com', time.time(), time.timezone] yield commands.CommitCommand('head', '1', None, committer, 'This is a funky character: \x83', None, [], files_one) handler.process(command_list) rev = branch.repository.get_revision(branch.last_revision()) self.assertEquals(u"This is a funky character: \ufffd", rev.message) class TestAddNonUtf8InBranch(TestCaseForGenericProcessor): def file_command_iter(self): # A add 'foo\x83' def command_list(): committer_a = ['', 'a@elmer.com', time.time(), time.timezone] def files_one(): yield commands.FileModifyCommand( 'foo\x83', kind_to_mode('file', False), None, "content A\n") yield commands.CommitCommand('head', '1', None, committer_a, "commit 1", None, [], files_one) return command_list def test_add(self): handler, branch = self.get_handler() handler.process(self.file_command_iter()) branch.lock_read() self.addCleanup(branch.unlock) rev_a = branch.last_revision() rtree_a = branch.repository.revision_tree(rev_a) foo_id = rtree_a.path2id(u'foo\ufffd') self.assertEqual(rev_a, rtree_a.get_file_revision(foo_id)) bzr-fastimport-0.13.0+bzr361/tests/test_head_tracking.py0000644000000000000000000001142411723157030021266 0ustar 00000000000000# Copyright (C) 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Test tracking of heads""" from cStringIO import StringIO from fastimport import ( commands, parser, ) import testtools from bzrlib.plugins.fastimport.reftracker import ( RefTracker, ) # A sample input stream that only adds files to a branch _SAMPLE_MAINLINE = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :2 data 17 Life is good ... commit refs/heads/master mark :101 committer a 1234798653 +0000 data 8 test ing from :100 M 644 :2 NEWS blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :101 M 644 :3 doc/README.txt M 644 :4 doc/index.txt """ # A sample input stream that adds files to two branches _SAMPLE_TWO_HEADS = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :2 data 17 Life is good ... commit refs/heads/mybranch mark :101 committer a 1234798653 +0000 data 8 test ing from :100 M 644 :2 NEWS blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 doc/README.txt M 644 :4 doc/index.txt """ # A sample input stream that adds files to two branches _SAMPLE_TWO_BRANCHES_MERGED = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt blob mark :2 data 17 Life is good ... commit refs/heads/mybranch mark :101 committer a 1234798653 +0000 data 8 test ing from :100 M 644 :2 NEWS blob mark :3 data 19 Welcome! my friend blob mark :4 data 11 == Docs == commit refs/heads/master mark :102 committer d 1234798653 +0000 data 8 test ing from :100 M 644 :3 doc/README.txt M 644 :4 doc/index.txt commit refs/heads/master mark :103 committer d 1234798653 +0000 data 8 test ing from :102 merge :101 D doc/index.txt """ # A sample input stream that contains a reset _SAMPLE_RESET = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt reset refs/remotes/origin/master from :100 """ # A sample input stream that contains a reset and more commits _SAMPLE_RESET_WITH_MORE_COMMITS = \ """blob mark :1 data 9 Welcome! commit refs/heads/master mark :100 committer a 1234798653 +0000 data 4 test M 644 :1 doc/README.txt reset refs/remotes/origin/master from :100 commit refs/remotes/origin/master mark :101 committer d 1234798653 +0000 data 8 test ing from :100 D doc/README.txt """ class TestHeadTracking(testtools.TestCase): def assertHeads(self, input, expected): s = StringIO(input) p = parser.ImportParser(s) reftracker = RefTracker() for cmd in p.iter_commands(): if isinstance(cmd, commands.CommitCommand): reftracker.track_heads(cmd) # eat the file commands list(cmd.iter_files()) elif isinstance(cmd, commands.ResetCommand): if cmd.from_ is not None: reftracker.track_heads_for_ref(cmd.ref, cmd.from_) self.assertEqual(reftracker.heads, expected) def test_mainline(self): self.assertHeads(_SAMPLE_MAINLINE, { ':102': set(['refs/heads/master']), }) def test_two_heads(self): self.assertHeads(_SAMPLE_TWO_HEADS, { ':101': set(['refs/heads/mybranch']), ':102': set(['refs/heads/master']), }) def test_two_branches_merged(self): self.assertHeads(_SAMPLE_TWO_BRANCHES_MERGED, { ':103': set(['refs/heads/master']), }) def test_reset(self): self.assertHeads(_SAMPLE_RESET, { ':100': set(['refs/heads/master', 'refs/remotes/origin/master']), }) def test_reset_with_more_commits(self): self.assertHeads(_SAMPLE_RESET_WITH_MORE_COMMITS, { ':101': set(['refs/remotes/origin/master']), }) bzr-fastimport-0.13.0+bzr361/tests/test_revision_store.py0000644000000000000000000001475311643171110021561 0ustar 00000000000000# Copyright (C) 2008, 2009 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Direct tests of the revision_store classes.""" from bzrlib import ( branch, errors, inventory, osutils, tests, ) from bzrlib.plugins.fastimport import ( revision_store, ) from bzrlib.plugins.fastimport.tests import ( FastimportFeature, ) class Test_TreeShim(tests.TestCase): _test_needs_features = [FastimportFeature] def invAddEntry(self, inv, path, file_id=None): if path.endswith('/'): path = path[:-1] kind = 'directory' else: kind = 'file' parent_path, basename = osutils.split(path) parent_id = inv.path2id(parent_path) inv.add(inventory.make_entry(kind, basename, parent_id, file_id)) def make_trivial_basis_inv(self): basis_inv = inventory.Inventory('TREE_ROOT') self.invAddEntry(basis_inv, 'foo', 'foo-id') self.invAddEntry(basis_inv, 'bar/', 'bar-id') self.invAddEntry(basis_inv, 'bar/baz', 'baz-id') return basis_inv def test_id2path_no_delta(self): basis_inv = self.make_trivial_basis_inv() shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=[], content_provider=None) self.assertEqual('', shim.id2path('TREE_ROOT')) self.assertEqual('foo', shim.id2path('foo-id')) self.assertEqual('bar', shim.id2path('bar-id')) self.assertEqual('bar/baz', shim.id2path('baz-id')) self.assertRaises(errors.NoSuchId, shim.id2path, 'qux-id') def test_id2path_with_delta(self): basis_inv = self.make_trivial_basis_inv() foo_entry = inventory.make_entry('file', 'foo2', 'TREE_ROOT', 'foo-id') inv_delta = [('foo', 'foo2', 'foo-id', foo_entry), ('bar/baz', None, 'baz-id', None), ] shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=inv_delta, content_provider=None) self.assertEqual('', shim.id2path('TREE_ROOT')) self.assertEqual('foo2', shim.id2path('foo-id')) self.assertEqual('bar', shim.id2path('bar-id')) self.assertRaises(errors.NoSuchId, shim.id2path, 'baz-id') def test_path2id(self): basis_inv = self.make_trivial_basis_inv() shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=[], content_provider=None) self.assertEqual('TREE_ROOT', shim.path2id('')) # We don't want to ever give a wrong value, so for now we just raise # NotImplementedError self.assertRaises(NotImplementedError, shim.path2id, 'bar') def test_get_file_with_stat_content_in_stream(self): basis_inv = self.make_trivial_basis_inv() def content_provider(file_id): return 'content of\n' + file_id + '\n' shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=[], content_provider=content_provider) f_obj, stat_val = shim.get_file_with_stat('baz-id') self.assertIs(None, stat_val) self.assertEqualDiff('content of\nbaz-id\n', f_obj.read()) # TODO: Test when the content isn't in the stream, and we fall back to the # repository that was passed in def test_get_symlink_target(self): basis_inv = self.make_trivial_basis_inv() ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') ie.symlink_target = u'link-target' basis_inv.add(ie) shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=[], content_provider=None) self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) def test_get_symlink_target_from_delta(self): basis_inv = self.make_trivial_basis_inv() ie = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') ie.symlink_target = u'link-target' inv_delta = [(None, 'link', 'link-id', ie)] shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=inv_delta, content_provider=None) self.assertEqual(u'link-target', shim.get_symlink_target('link-id')) def test__delta_to_iter_changes(self): basis_inv = self.make_trivial_basis_inv() foo_entry = inventory.make_entry('file', 'foo2', 'bar-id', 'foo-id') link_entry = inventory.make_entry('symlink', 'link', 'TREE_ROOT', 'link-id') link_entry.symlink_target = u'link-target' inv_delta = [('foo', 'bar/foo2', 'foo-id', foo_entry), ('bar/baz', None, 'baz-id', None), (None, 'link', 'link-id', link_entry), ] shim = revision_store._TreeShim(repo=None, basis_inv=basis_inv, inv_delta=inv_delta, content_provider=None) changes = list(shim._delta_to_iter_changes()) expected = [('foo-id', ('foo', 'bar/foo2'), False, (True, True), ('TREE_ROOT', 'bar-id'), ('foo', 'foo2'), ('file', 'file'), (False, False)), ('baz-id', ('bar/baz', None), True, (True, False), ('bar-id', None), ('baz', None), ('file', None), (False, None)), ('link-id', (None, 'link'), True, (False, True), (None, 'TREE_ROOT'), (None, 'link'), (None, 'symlink'), (None, False)), ] # from pprint import pformat # self.assertEqualDiff(pformat(expected), pformat(changes)) self.assertEqual(expected, changes)